Spaces:
Running
Running
github-actions
commited on
Commit
·
a0e03be
0
Parent(s):
Deploy Panel Space
Browse files- Dockerfile +14 -0
- README.md +11 -0
- data/kpi_health_check_presets/presets_1.json +301 -0
- data/kpi_health_check_profiles/Profil_1.json +32 -0
- panel_app/convert_to_excel_panel.py +55 -0
- panel_app/kpi_health_check_drilldown_plots.py +360 -0
- panel_app/kpi_health_check_panel.py +0 -0
- panel_app/kpi_health_check_panel_v2.py +0 -0
- panel_app/panel_portal.py +121 -0
- panel_app/panel_v2_backend.py +128 -0
- panel_app/trafic_analysis_panel.py +2459 -0
- physical_db/physical_database.csv +0 -0
- process_kpi/__init__.py +0 -0
- process_kpi/gsm_kpi_requirements.md +47 -0
- process_kpi/kpi_health_check/__init__.py +0 -0
- process_kpi/kpi_health_check/benchmarks.py +42 -0
- process_kpi/kpi_health_check/engine.py +293 -0
- process_kpi/kpi_health_check/engine_v2.py +320 -0
- process_kpi/kpi_health_check/export.py +264 -0
- process_kpi/kpi_health_check/io.py +45 -0
- process_kpi/kpi_health_check/kpi_groups.py +96 -0
- process_kpi/kpi_health_check/multi_rat.py +253 -0
- process_kpi/kpi_health_check/normalization.py +292 -0
- process_kpi/kpi_health_check/presets.py +79 -0
- process_kpi/kpi_health_check/profiles.py +71 -0
- process_kpi/kpi_health_check/rules.py +132 -0
- process_kpi/lte_kpi_requirements.md +46 -0
- process_kpi/process_gsm_capacity.py +719 -0
- process_kpi/process_lcg_capacity.py +286 -0
- process_kpi/process_lte_capacity.py +528 -0
- process_kpi/process_wbts_capacity.py +312 -0
- process_kpi/process_wcel_capacity.py +348 -0
- requirements.txt +10 -0
- utils/azimuth_validation.py +35 -0
- utils/check_sheet_exist.py +90 -0
- utils/config_band.py +156 -0
- utils/convert_to_excel.py +365 -0
- utils/extract_code.py +34 -0
- utils/kml_creator.py +79 -0
- utils/kpi_analysis_utils.py +666 -0
- utils/rnc_bsc_lac_count_chart.py +89 -0
- utils/utils_functions.py +126 -0
- utils/utils_vars.py +243 -0
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
COPY --chown=user:user requirements.txt /app/requirements.txt
|
| 7 |
+
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
|
| 8 |
+
|
| 9 |
+
COPY --chown=user:user . /app
|
| 10 |
+
|
| 11 |
+
USER user
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
|
| 14 |
+
CMD ["panel", "serve", "panel_app/panel_portal.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin=*", "--num-procs", "1", "--log-level", "info"]
|
README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: KPI Analysis (Panel)
|
| 3 |
+
emoji: "📊"
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
This Space runs the Panel portal located at `panel_app/panel_portal.py`.
|
data/kpi_health_check_presets/presets_1.json
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "Test1",
|
| 3 |
+
"saved_at": "2025-12-13T13:16:30.212771Z",
|
| 4 |
+
"rules": [
|
| 5 |
+
{
|
| 6 |
+
"RAT": "2G",
|
| 7 |
+
"KPI": "2G_Carried Traffic",
|
| 8 |
+
"direction": "higher_is_better",
|
| 9 |
+
"sla": NaN
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"RAT": "2G",
|
| 13 |
+
"KPI": "Data CSSR",
|
| 14 |
+
"direction": "higher_is_better",
|
| 15 |
+
"sla": 90.0
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"RAT": "2G",
|
| 19 |
+
"KPI": "FT_2G_SDCCH_Drop_rate_1",
|
| 20 |
+
"direction": "lower_is_better",
|
| 21 |
+
"sla": 2.0
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"RAT": "2G",
|
| 25 |
+
"KPI": "Handover success rate",
|
| 26 |
+
"direction": "higher_is_better",
|
| 27 |
+
"sla": 98.0
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"RAT": "2G",
|
| 31 |
+
"KPI": "PS_UL_Load",
|
| 32 |
+
"direction": "higher_is_better",
|
| 33 |
+
"sla": NaN
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"RAT": "2G",
|
| 37 |
+
"KPI": "SDCCH real blocking",
|
| 38 |
+
"direction": "lower_is_better",
|
| 39 |
+
"sla": 2.0
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"RAT": "2G",
|
| 43 |
+
"KPI": "TCH availability ratio",
|
| 44 |
+
"direction": "higher_is_better",
|
| 45 |
+
"sla": 98.0
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"RAT": "2G",
|
| 49 |
+
"KPI": "TCH call blocking",
|
| 50 |
+
"direction": "lower_is_better",
|
| 51 |
+
"sla": 2.0
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"RAT": "2G",
|
| 55 |
+
"KPI": "TCH_ABIS_FAIL_CALL (c001084)",
|
| 56 |
+
"direction": "lower_is_better",
|
| 57 |
+
"sla": 10.0
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"RAT": "2G",
|
| 61 |
+
"KPI": "TRAFFIC_PS DL",
|
| 62 |
+
"direction": "higher_is_better",
|
| 63 |
+
"sla": NaN
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"RAT": "2G",
|
| 67 |
+
"KPI": "Voice CSSR%_",
|
| 68 |
+
"direction": "higher_is_better",
|
| 69 |
+
"sla": 98.0
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"RAT": "2G",
|
| 73 |
+
"KPI": "Voice_DCR_OML",
|
| 74 |
+
"direction": "lower_is_better",
|
| 75 |
+
"sla": 2.0
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"RAT": "3G",
|
| 79 |
+
"KPI": "3G Call Setup Success Rate PS",
|
| 80 |
+
"direction": "higher_is_better",
|
| 81 |
+
"sla": 98.0
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"RAT": "3G",
|
| 85 |
+
"KPI": "3G Drop Call Rate CS",
|
| 86 |
+
"direction": "lower_is_better",
|
| 87 |
+
"sla": 2.0
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"RAT": "3G",
|
| 91 |
+
"KPI": "3G Drop Call Rate - All Data services",
|
| 92 |
+
"direction": "lower_is_better",
|
| 93 |
+
"sla": 2.0
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"RAT": "3G",
|
| 97 |
+
"KPI": "3G HSUPA_USER_THROUGHPUT_BOTH_MT",
|
| 98 |
+
"direction": "higher_is_better",
|
| 99 |
+
"sla": NaN
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"RAT": "3G",
|
| 103 |
+
"KPI": "Average RTWP",
|
| 104 |
+
"direction": "lower_is_better",
|
| 105 |
+
"sla": -89.0
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"RAT": "3G",
|
| 109 |
+
"KPI": "CS_CALL_RADIO_CONN_LOST (M1006C311)",
|
| 110 |
+
"direction": "lower_is_better",
|
| 111 |
+
"sla": 100.0
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"RAT": "3G",
|
| 115 |
+
"KPI": "Cell Availability, excluding blocked by user state (BLU)",
|
| 116 |
+
"direction": "higher_is_better",
|
| 117 |
+
"sla": 98.0
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"RAT": "3G",
|
| 121 |
+
"KPI": "FT_Soft_HO_Success_Rate_1",
|
| 122 |
+
"direction": "higher_is_better",
|
| 123 |
+
"sla": 98.0
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"RAT": "3G",
|
| 127 |
+
"KPI": "HSDPA congestion rate in Iub",
|
| 128 |
+
"direction": "lower_is_better",
|
| 129 |
+
"sla": 10.0
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"RAT": "3G",
|
| 133 |
+
"KPI": "HSDPA_USER_THROUGHPUT",
|
| 134 |
+
"direction": "higher_is_better",
|
| 135 |
+
"sla": NaN
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"RAT": "3G",
|
| 139 |
+
"KPI": "IUB_LOSS_CC_FRAME_LOSS_IND (M1022C71)",
|
| 140 |
+
"direction": "lower_is_better",
|
| 141 |
+
"sla": 100.0
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"RAT": "3G",
|
| 145 |
+
"KPI": "Total CS traffic - Erl",
|
| 146 |
+
"direction": "higher_is_better",
|
| 147 |
+
"sla": NaN
|
| 148 |
+
},
|
| 149 |
+
{
|
| 150 |
+
"RAT": "3G",
|
| 151 |
+
"KPI": "Total_Data_Traffic",
|
| 152 |
+
"direction": "higher_is_better",
|
| 153 |
+
"sla": NaN
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"RAT": "3G",
|
| 157 |
+
"KPI": "ft_cs_call_setup_success_rate",
|
| 158 |
+
"direction": "higher_is_better",
|
| 159 |
+
"sla": 98.0
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"RAT": "3G",
|
| 163 |
+
"KPI": "ft_hsdpa_call_setup_succ_rate",
|
| 164 |
+
"direction": "higher_is_better",
|
| 165 |
+
"sla": 98.0
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"RAT": "3G",
|
| 169 |
+
"KPI": "ft_hsupa_call_setup_succ_rate",
|
| 170 |
+
"direction": "higher_is_better",
|
| 171 |
+
"sla": 98.0
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"RAT": "LTE",
|
| 175 |
+
"KPI": "% MIMO RI 2",
|
| 176 |
+
"direction": "higher_is_better",
|
| 177 |
+
"sla": 50.0
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"RAT": "LTE",
|
| 181 |
+
"KPI": "4G/LTE CALL SETUP SUCCESS RATE (WITHOUT VOLTE)",
|
| 182 |
+
"direction": "higher_is_better",
|
| 183 |
+
"sla": 98.0
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"RAT": "LTE",
|
| 187 |
+
"KPI": "4G/LTE DL Traffic Volume (GBytes)",
|
| 188 |
+
"direction": "higher_is_better",
|
| 189 |
+
"sla": NaN
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"RAT": "LTE",
|
| 193 |
+
"KPI": "4G/LTE UL Traffic Volume (GBytes)",
|
| 194 |
+
"direction": "higher_is_better",
|
| 195 |
+
"sla": NaN
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"RAT": "LTE",
|
| 199 |
+
"KPI": "AVE 4G/LTE DL USER THRPUT (ALL)KBnew",
|
| 200 |
+
"direction": "higher_is_better",
|
| 201 |
+
"sla": NaN
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"RAT": "LTE",
|
| 205 |
+
"KPI": "AVE 4G/LTE UL USER THRPUT (ALL) (Knew",
|
| 206 |
+
"direction": "higher_is_better",
|
| 207 |
+
"sla": NaN
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"RAT": "LTE",
|
| 211 |
+
"KPI": "AVG_RTWP_RX_ANT_1 (M8005C306)",
|
| 212 |
+
"direction": "lower_is_better",
|
| 213 |
+
"sla": -800.0
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"RAT": "LTE",
|
| 217 |
+
"KPI": "AVG_RTWP_RX_ANT_2 (M8005C307)",
|
| 218 |
+
"direction": "lower_is_better",
|
| 219 |
+
"sla": -800.0
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"RAT": "LTE",
|
| 223 |
+
"KPI": "Avg RRC conn UE",
|
| 224 |
+
"direction": "higher_is_better",
|
| 225 |
+
"sla": NaN
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"RAT": "LTE",
|
| 229 |
+
"KPI": "Avg UE distance",
|
| 230 |
+
"direction": "higher_is_better",
|
| 231 |
+
"sla": NaN,
|
| 232 |
+
"policy": "notify"
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"RAT": "LTE",
|
| 236 |
+
"KPI": "CSFB_V6",
|
| 237 |
+
"direction": "higher_is_better",
|
| 238 |
+
"sla": 98.0
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"RAT": "LTE",
|
| 242 |
+
"KPI": "Call Drop Rate_ 4G New",
|
| 243 |
+
"direction": "lower_is_better",
|
| 244 |
+
"sla": 2.0
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"RAT": "LTE",
|
| 248 |
+
"KPI": "Cell Avail excl BLU",
|
| 249 |
+
"direction": "higher_is_better",
|
| 250 |
+
"sla": 98.0
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"RAT": "LTE",
|
| 254 |
+
"KPI": "E-UTRAN Avg PRB usage per TTI DL",
|
| 255 |
+
"direction": "lower_is_better",
|
| 256 |
+
"sla": 50.0
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"RAT": "LTE",
|
| 260 |
+
"KPI": "E-UTRAN E-RAB stp SR",
|
| 261 |
+
"direction": "higher_is_better",
|
| 262 |
+
"sla": 98.0
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"RAT": "LTE",
|
| 266 |
+
"KPI": "E-UTRAN Intra-Freq HO SR",
|
| 267 |
+
"direction": "higher_is_better",
|
| 268 |
+
"sla": 98.0
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"RAT": "LTE",
|
| 272 |
+
"KPI": "E-UTRAN RRC Paging Discard Ratio",
|
| 273 |
+
"direction": "lower_is_better",
|
| 274 |
+
"sla": 1.0
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"RAT": "LTE",
|
| 278 |
+
"KPI": "Intra eNB HO SR",
|
| 279 |
+
"direction": "higher_is_better",
|
| 280 |
+
"sla": 98.0
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"RAT": "LTE",
|
| 284 |
+
"KPI": "Max PDCP Thr DL (classic eNB)",
|
| 285 |
+
"direction": "higher_is_better",
|
| 286 |
+
"sla": NaN
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"RAT": "LTE",
|
| 290 |
+
"KPI": "Total E-UTRAN RRC conn stp SR",
|
| 291 |
+
"direction": "higher_is_better",
|
| 292 |
+
"sla": 98.0
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"RAT": "LTE",
|
| 296 |
+
"KPI": "UE-spec log S1 sig conn SR",
|
| 297 |
+
"direction": "higher_is_better",
|
| 298 |
+
"sla": 98.0
|
| 299 |
+
}
|
| 300 |
+
]
|
| 301 |
+
}
|
data/kpi_health_check_profiles/Profil_1.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "Profil_1",
|
| 3 |
+
"saved_at": "2025-12-13T13:16:45.937845Z",
|
| 4 |
+
"config": {
|
| 5 |
+
"analysis_range": [
|
| 6 |
+
null,
|
| 7 |
+
null
|
| 8 |
+
],
|
| 9 |
+
"baseline_days": 30,
|
| 10 |
+
"recent_days": 7,
|
| 11 |
+
"rel_threshold_pct": 10.0,
|
| 12 |
+
"min_consecutive_days": 3,
|
| 13 |
+
"min_criticality": 0,
|
| 14 |
+
"min_anomaly_score": 0,
|
| 15 |
+
"city_filter": "",
|
| 16 |
+
"top_rat_filter": [
|
| 17 |
+
"2G",
|
| 18 |
+
"3G",
|
| 19 |
+
"LTE"
|
| 20 |
+
],
|
| 21 |
+
"top_status_filter": [
|
| 22 |
+
"DEGRADED",
|
| 23 |
+
"PERSISTENT_DEGRADED"
|
| 24 |
+
],
|
| 25 |
+
"preset_selected": "presets_1.json",
|
| 26 |
+
"drilldown": {
|
| 27 |
+
"site_code": 2130,
|
| 28 |
+
"rat": "LTE",
|
| 29 |
+
"kpi": ""
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
}
|
panel_app/convert_to_excel_panel.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import time
|
| 3 |
+
from typing import Iterable, Sequence
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def write_dfs_to_excel(
|
| 9 |
+
dfs: Sequence[pd.DataFrame],
|
| 10 |
+
sheet_names: Sequence[str],
|
| 11 |
+
index: bool = True,
|
| 12 |
+
profile: dict | None = None,
|
| 13 |
+
) -> bytes:
|
| 14 |
+
"""Simple Excel export for Panel.
|
| 15 |
+
|
| 16 |
+
Writes the given DataFrames to an in-memory XLSX file and returns the bytes.
|
| 17 |
+
No Streamlit dependency and no heavy formatting, to keep Panel exports fast
|
| 18 |
+
and avoid Streamlit runtime warnings.
|
| 19 |
+
"""
|
| 20 |
+
bytes_io = io.BytesIO()
|
| 21 |
+
t0 = time.perf_counter() if profile is not None else 0.0
|
| 22 |
+
with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
|
| 23 |
+
for df, name in zip(dfs, sheet_names):
|
| 24 |
+
# Ensure we always write a valid DataFrame, even if None was passed
|
| 25 |
+
safe_df = df if isinstance(df, pd.DataFrame) else pd.DataFrame()
|
| 26 |
+
t_sheet0 = time.perf_counter() if profile is not None else 0.0
|
| 27 |
+
safe_df.to_excel(writer, sheet_name=str(name), index=index)
|
| 28 |
+
t_sheet1 = time.perf_counter() if profile is not None else 0.0
|
| 29 |
+
|
| 30 |
+
if profile is not None:
|
| 31 |
+
sheets = profile.get("excel_sheets")
|
| 32 |
+
if not isinstance(sheets, list):
|
| 33 |
+
sheets = []
|
| 34 |
+
profile["excel_sheets"] = sheets
|
| 35 |
+
try:
|
| 36 |
+
rows = int(len(safe_df))
|
| 37 |
+
except Exception: # noqa: BLE001
|
| 38 |
+
rows = 0
|
| 39 |
+
try:
|
| 40 |
+
cols = int(safe_df.shape[1])
|
| 41 |
+
except Exception: # noqa: BLE001
|
| 42 |
+
cols = 0
|
| 43 |
+
sheets.append(
|
| 44 |
+
{
|
| 45 |
+
"name": str(name),
|
| 46 |
+
"rows": rows,
|
| 47 |
+
"cols": cols,
|
| 48 |
+
"seconds": float(t_sheet1 - t_sheet0),
|
| 49 |
+
}
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if profile is not None:
|
| 53 |
+
profile["excel_total_seconds"] = float(time.perf_counter() - t0)
|
| 54 |
+
|
| 55 |
+
return bytes_io.getvalue()
|
panel_app/kpi_health_check_drilldown_plots.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import timedelta
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import plotly.graph_objects as go
|
| 5 |
+
from plotly.subplots import make_subplots
|
| 6 |
+
|
| 7 |
+
from process_kpi.kpi_health_check.engine import is_bad
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def build_drilldown_plot(
|
| 11 |
+
df: pd.DataFrame,
|
| 12 |
+
kpis: list[str],
|
| 13 |
+
rules_df: pd.DataFrame | None = None,
|
| 14 |
+
highlight_bad_days: bool = True,
|
| 15 |
+
show_sla: bool = True,
|
| 16 |
+
site_code: str | int = "",
|
| 17 |
+
rat: str = "",
|
| 18 |
+
main_kpi: str | None = None,
|
| 19 |
+
baseline_days_n: int = 30,
|
| 20 |
+
recent_days_n: int = 7,
|
| 21 |
+
rel_threshold_pct: float = 10.0,
|
| 22 |
+
normalization: str = "None",
|
| 23 |
+
granularity: str = "Daily",
|
| 24 |
+
) -> go.Figure | None:
|
| 25 |
+
"""
|
| 26 |
+
Builds the drill-down trend plot with native Plotly annotations.
|
| 27 |
+
"""
|
| 28 |
+
if df is None or df.empty or not kpis:
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
# Filter columns that exist
|
| 32 |
+
valid_kpis = [k for k in kpis if k in df.columns]
|
| 33 |
+
if not valid_kpis:
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
g = str(granularity or "Daily").strip().lower()
|
| 37 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 38 |
+
time_col = (
|
| 39 |
+
"period_start" if (is_hourly and "period_start" in df.columns) else "date_only"
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
plot_df = df.sort_values(time_col).copy()
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
plot_df[time_col] = pd.to_datetime(plot_df[time_col])
|
| 46 |
+
except Exception:
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
if main_kpi is None and valid_kpis:
|
| 50 |
+
main_kpi = valid_kpis[0]
|
| 51 |
+
|
| 52 |
+
title_text = f"{rat} - Site {site_code}"
|
| 53 |
+
# If single KPI, be explicit in title
|
| 54 |
+
if len(valid_kpis) == 1:
|
| 55 |
+
title_text = f"{rat} - Site {site_code} - {valid_kpis[0]}"
|
| 56 |
+
|
| 57 |
+
# Subplot for Timeline (Streak) - Row 2
|
| 58 |
+
# Row 1: Main Trend
|
| 59 |
+
fig = make_subplots(
|
| 60 |
+
rows=2,
|
| 61 |
+
cols=1,
|
| 62 |
+
shared_xaxes=True,
|
| 63 |
+
vertical_spacing=0.05,
|
| 64 |
+
row_heights=[0.85, 0.15],
|
| 65 |
+
subplot_titles=(title_text, "Status Check"),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
norm_mode = str(normalization or "None").strip()
|
| 69 |
+
do_norm = (norm_mode != "None") and (len(valid_kpis) > 1)
|
| 70 |
+
|
| 71 |
+
n_kpis = len(valid_kpis)
|
| 72 |
+
trace_mode = "lines+markers" if n_kpis <= 3 else "lines"
|
| 73 |
+
marker_size = 6 if n_kpis <= 3 else 0
|
| 74 |
+
|
| 75 |
+
for kpi in valid_kpis:
|
| 76 |
+
# Data preparation
|
| 77 |
+
x_data = plot_df[time_col]
|
| 78 |
+
y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
|
| 79 |
+
if do_norm:
|
| 80 |
+
if norm_mode == "Min-Max":
|
| 81 |
+
vmin = y_data.min(skipna=True)
|
| 82 |
+
vmax = y_data.max(skipna=True)
|
| 83 |
+
if pd.notna(vmin) and pd.notna(vmax) and float(vmax) != float(vmin):
|
| 84 |
+
y_data = (y_data - vmin) / (vmax - vmin)
|
| 85 |
+
else:
|
| 86 |
+
y_data = y_data * 0.0
|
| 87 |
+
elif norm_mode == "Z-score":
|
| 88 |
+
mu = y_data.mean(skipna=True)
|
| 89 |
+
sd = y_data.std(skipna=True)
|
| 90 |
+
if pd.notna(sd) and float(sd) != 0.0:
|
| 91 |
+
y_data = (y_data - mu) / sd
|
| 92 |
+
else:
|
| 93 |
+
y_data = y_data * 0.0
|
| 94 |
+
|
| 95 |
+
# Add Trace
|
| 96 |
+
fig.add_trace(
|
| 97 |
+
go.Scatter(
|
| 98 |
+
x=x_data,
|
| 99 |
+
y=y_data,
|
| 100 |
+
mode=trace_mode,
|
| 101 |
+
name=kpi,
|
| 102 |
+
legendgroup=kpi, # Allows grouping logic if needed
|
| 103 |
+
marker=dict(size=marker_size) if marker_size else None,
|
| 104 |
+
),
|
| 105 |
+
row=1,
|
| 106 |
+
col=1,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# Add SLA line if available
|
| 110 |
+
if show_sla and rules_df is not None:
|
| 111 |
+
try:
|
| 112 |
+
if do_norm:
|
| 113 |
+
continue
|
| 114 |
+
# Find rule for this KPI
|
| 115 |
+
# Note: This implies rules_df is filtered for the correct RAT
|
| 116 |
+
rule = rules_df[rules_df["KPI"] == kpi]
|
| 117 |
+
if not rule.empty:
|
| 118 |
+
pol = (
|
| 119 |
+
str(rule.iloc[0].get("policy", "enforce") or "enforce")
|
| 120 |
+
.strip()
|
| 121 |
+
.lower()
|
| 122 |
+
)
|
| 123 |
+
if pol == "notify":
|
| 124 |
+
continue
|
| 125 |
+
if len(valid_kpis) > 1 and str(kpi) != str(main_kpi):
|
| 126 |
+
continue
|
| 127 |
+
sla_val = pd.to_numeric(rule.iloc[0].get("sla"), errors="coerce")
|
| 128 |
+
if pd.notna(sla_val):
|
| 129 |
+
fig.add_hline(
|
| 130 |
+
y=sla_val,
|
| 131 |
+
line_dash="dot",
|
| 132 |
+
line_color="red",
|
| 133 |
+
annotation_text=f"SLA {kpi}",
|
| 134 |
+
annotation_position="bottom right",
|
| 135 |
+
row=1,
|
| 136 |
+
col=1,
|
| 137 |
+
)
|
| 138 |
+
except Exception:
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
if highlight_bad_days and main_kpi and main_kpi in plot_df.columns:
|
| 143 |
+
direction = "higher_is_better"
|
| 144 |
+
policy = "enforce"
|
| 145 |
+
sla_eval = None
|
| 146 |
+
if (
|
| 147 |
+
rules_df is not None
|
| 148 |
+
and isinstance(rules_df, pd.DataFrame)
|
| 149 |
+
and not rules_df.empty
|
| 150 |
+
):
|
| 151 |
+
rule = rules_df[rules_df["KPI"] == str(main_kpi)]
|
| 152 |
+
if not rule.empty:
|
| 153 |
+
direction = str(
|
| 154 |
+
rule.iloc[0].get("direction", direction) or direction
|
| 155 |
+
)
|
| 156 |
+
policy = (
|
| 157 |
+
str(rule.iloc[0].get("policy", policy) or policy)
|
| 158 |
+
.strip()
|
| 159 |
+
.lower()
|
| 160 |
+
)
|
| 161 |
+
if policy != "notify":
|
| 162 |
+
sla_val = pd.to_numeric(
|
| 163 |
+
rule.iloc[0].get("sla"), errors="coerce"
|
| 164 |
+
)
|
| 165 |
+
if pd.notna(sla_val):
|
| 166 |
+
try:
|
| 167 |
+
sla_eval = float(sla_val)
|
| 168 |
+
except Exception:
|
| 169 |
+
sla_eval = None
|
| 170 |
+
|
| 171 |
+
end_dt = pd.to_datetime(plot_df[time_col]).max()
|
| 172 |
+
if is_hourly:
|
| 173 |
+
rs = end_dt - timedelta(hours=max(int(recent_days_n), 1) * 24 - 1)
|
| 174 |
+
be = rs - timedelta(hours=1)
|
| 175 |
+
bs = be - timedelta(hours=max(int(baseline_days_n), 1) * 24 - 1)
|
| 176 |
+
else:
|
| 177 |
+
rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
|
| 178 |
+
be = rs - timedelta(days=1)
|
| 179 |
+
bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
|
| 180 |
+
|
| 181 |
+
dts = pd.to_datetime(plot_df[time_col])
|
| 182 |
+
baseline_mask = (dts >= bs) & (dts <= be)
|
| 183 |
+
recent_mask = (dts >= rs) & (dts <= end_dt)
|
| 184 |
+
baseline_val = pd.to_numeric(
|
| 185 |
+
plot_df.loc[baseline_mask, str(main_kpi)], errors="coerce"
|
| 186 |
+
).median()
|
| 187 |
+
baseline_val = float(baseline_val) if pd.notna(baseline_val) else None
|
| 188 |
+
|
| 189 |
+
vals = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
|
| 190 |
+
bad_flags = [
|
| 191 |
+
is_bad(
|
| 192 |
+
float(v) if pd.notna(v) else None,
|
| 193 |
+
baseline_val,
|
| 194 |
+
direction,
|
| 195 |
+
float(rel_threshold_pct),
|
| 196 |
+
sla_eval,
|
| 197 |
+
)
|
| 198 |
+
for v in vals.tolist()
|
| 199 |
+
]
|
| 200 |
+
|
| 201 |
+
bad_recent = [bool(b) and bool(r) for b, r in zip(bad_flags, recent_mask)]
|
| 202 |
+
|
| 203 |
+
baseline_color = "#bdbdbd"
|
| 204 |
+
ok_color = "#1565c0"
|
| 205 |
+
bad_color = "#f9a825" if policy == "notify" else "#e53935"
|
| 206 |
+
|
| 207 |
+
colors = []
|
| 208 |
+
hover_txt = []
|
| 209 |
+
for is_b, is_base, is_recent in zip(bad_flags, baseline_mask, recent_mask):
|
| 210 |
+
if bool(is_base):
|
| 211 |
+
colors.append(baseline_color)
|
| 212 |
+
hover_txt.append(f"BASELINE ({main_kpi})")
|
| 213 |
+
elif bool(is_recent) and bool(is_b):
|
| 214 |
+
colors.append(bad_color)
|
| 215 |
+
hover_txt.append(f"RECENT BAD ({main_kpi})")
|
| 216 |
+
elif bool(is_recent):
|
| 217 |
+
colors.append(ok_color)
|
| 218 |
+
hover_txt.append(f"RECENT OK ({main_kpi})")
|
| 219 |
+
else:
|
| 220 |
+
colors.append("#e0e0e0")
|
| 221 |
+
hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
|
| 222 |
+
fig.add_trace(
|
| 223 |
+
go.Scatter(
|
| 224 |
+
x=plot_df[time_col],
|
| 225 |
+
y=[0] * len(plot_df),
|
| 226 |
+
mode="markers",
|
| 227 |
+
marker=dict(symbol="square", size=10, color=colors),
|
| 228 |
+
showlegend=False,
|
| 229 |
+
hovertext=hover_txt,
|
| 230 |
+
hoverinfo="text",
|
| 231 |
+
),
|
| 232 |
+
row=2,
|
| 233 |
+
col=1,
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
y_main = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
|
| 238 |
+
if do_norm:
|
| 239 |
+
if norm_mode == "Min-Max":
|
| 240 |
+
vmin = y_main.min(skipna=True)
|
| 241 |
+
vmax = y_main.max(skipna=True)
|
| 242 |
+
if (
|
| 243 |
+
pd.notna(vmin)
|
| 244 |
+
and pd.notna(vmax)
|
| 245 |
+
and float(vmax) != float(vmin)
|
| 246 |
+
):
|
| 247 |
+
y_main = (y_main - vmin) / (vmax - vmin)
|
| 248 |
+
else:
|
| 249 |
+
y_main = y_main * 0.0
|
| 250 |
+
elif norm_mode == "Z-score":
|
| 251 |
+
mu = y_main.mean(skipna=True)
|
| 252 |
+
sd = y_main.std(skipna=True)
|
| 253 |
+
if pd.notna(sd) and float(sd) != 0.0:
|
| 254 |
+
y_main = (y_main - mu) / sd
|
| 255 |
+
else:
|
| 256 |
+
y_main = y_main * 0.0
|
| 257 |
+
|
| 258 |
+
idx_bad = [i for i, b in enumerate(bad_recent) if bool(b)]
|
| 259 |
+
if idx_bad:
|
| 260 |
+
fig.add_trace(
|
| 261 |
+
go.Scatter(
|
| 262 |
+
x=[plot_df[time_col].iloc[i] for i in idx_bad],
|
| 263 |
+
y=[y_main.iloc[i] for i in idx_bad],
|
| 264 |
+
mode="markers",
|
| 265 |
+
marker=dict(size=10, color=bad_color, symbol="circle"),
|
| 266 |
+
name="Bad days",
|
| 267 |
+
showlegend=(n_kpis <= 3),
|
| 268 |
+
),
|
| 269 |
+
row=1,
|
| 270 |
+
col=1,
|
| 271 |
+
)
|
| 272 |
+
except Exception:
|
| 273 |
+
pass
|
| 274 |
+
except Exception:
|
| 275 |
+
pass
|
| 276 |
+
|
| 277 |
+
if not plot_df.empty and not highlight_bad_days:
|
| 278 |
+
fig.add_trace(
|
| 279 |
+
go.Scatter(
|
| 280 |
+
x=plot_df[time_col],
|
| 281 |
+
y=[0] * len(plot_df),
|
| 282 |
+
mode="markers",
|
| 283 |
+
opacity=0,
|
| 284 |
+
showlegend=False,
|
| 285 |
+
hoverinfo="skip",
|
| 286 |
+
),
|
| 287 |
+
row=2,
|
| 288 |
+
col=1,
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
fig.update_layout(
|
| 292 |
+
template="plotly_white",
|
| 293 |
+
height=500,
|
| 294 |
+
margin=dict(l=50, r=50, t=50, b=50),
|
| 295 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
| 296 |
+
hovermode="x unified",
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
try:
|
| 300 |
+
force_all_dates = False
|
| 301 |
+
try:
|
| 302 |
+
x_min = pd.to_datetime(plot_df[time_col]).min()
|
| 303 |
+
x_max = pd.to_datetime(plot_df[time_col]).max()
|
| 304 |
+
span_days = int((x_max - x_min).days) + 1
|
| 305 |
+
n_dates = int(pd.to_datetime(plot_df[time_col]).nunique())
|
| 306 |
+
force_all_dates = (span_days <= 200) and (n_dates <= 200)
|
| 307 |
+
except Exception:
|
| 308 |
+
force_all_dates = False
|
| 309 |
+
|
| 310 |
+
if do_norm:
|
| 311 |
+
fig.update_yaxes(title_text=f"Normalized ({norm_mode})", row=1, col=1)
|
| 312 |
+
else:
|
| 313 |
+
fig.update_yaxes(title_text="Value", row=1, col=1)
|
| 314 |
+
fig.update_yaxes(
|
| 315 |
+
showticklabels=False,
|
| 316 |
+
showgrid=False,
|
| 317 |
+
zeroline=False,
|
| 318 |
+
range=[-1, 1],
|
| 319 |
+
row=2,
|
| 320 |
+
col=1,
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
if force_all_dates and not is_hourly:
|
| 324 |
+
fig.update_xaxes(
|
| 325 |
+
tickmode="linear",
|
| 326 |
+
dtick=86400000,
|
| 327 |
+
tickformat="%d-%b",
|
| 328 |
+
tickangle=-90,
|
| 329 |
+
tickfont=dict(size=10),
|
| 330 |
+
automargin=True,
|
| 331 |
+
ticks="outside",
|
| 332 |
+
ticklen=6,
|
| 333 |
+
showgrid=True,
|
| 334 |
+
row=2,
|
| 335 |
+
col=1,
|
| 336 |
+
)
|
| 337 |
+
else:
|
| 338 |
+
fig.update_xaxes(
|
| 339 |
+
tickangle=-45,
|
| 340 |
+
automargin=True,
|
| 341 |
+
ticks="outside",
|
| 342 |
+
ticklen=6,
|
| 343 |
+
showgrid=True,
|
| 344 |
+
tickformatstops=[
|
| 345 |
+
{
|
| 346 |
+
"dtickrange": [None, 86400000],
|
| 347 |
+
"value": "%d-%b\n%H:%M" if is_hourly else "%d-%b\n%Y",
|
| 348 |
+
},
|
| 349 |
+
{"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
|
| 350 |
+
{"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
|
| 351 |
+
{"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
|
| 352 |
+
{"dtickrange": ["M12", None], "value": "%Y"},
|
| 353 |
+
],
|
| 354 |
+
row=2,
|
| 355 |
+
col=1,
|
| 356 |
+
)
|
| 357 |
+
except Exception:
|
| 358 |
+
pass
|
| 359 |
+
|
| 360 |
+
return fig
|
panel_app/kpi_health_check_panel.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
panel_app/kpi_health_check_panel_v2.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
panel_app/panel_portal.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import panel as pn
|
| 5 |
+
|
| 6 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 7 |
+
if ROOT_DIR not in sys.path:
|
| 8 |
+
sys.path.insert(0, ROOT_DIR)
|
| 9 |
+
|
| 10 |
+
pn.extension("plotly", "tabulator")
|
| 11 |
+
|
| 12 |
+
# Import pages (kept as modules, not nested templates)
|
| 13 |
+
from panel_app import (
|
| 14 |
+
kpi_health_check_panel,
|
| 15 |
+
kpi_health_check_panel_v2,
|
| 16 |
+
trafic_analysis_panel,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
PAGES = {
|
| 20 |
+
"📊 Global Traffic Analysis": {
|
| 21 |
+
"get_components": trafic_analysis_panel.get_page_components,
|
| 22 |
+
"description": "Analyse trafic multi-RAT + cartes + exports.",
|
| 23 |
+
},
|
| 24 |
+
"📈 KPI Health Check": {
|
| 25 |
+
"get_components": kpi_health_check_panel.get_page_components,
|
| 26 |
+
"description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
|
| 27 |
+
},
|
| 28 |
+
"⚡ KPI Health Check (V2)": {
|
| 29 |
+
"get_components": kpi_health_check_panel_v2.get_page_components,
|
| 30 |
+
"description": "Version optimisée (cache disque + moteur health-check vectorisé).",
|
| 31 |
+
},
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
HOME_PAGE = "🏠 Gallery"
|
| 35 |
+
|
| 36 |
+
page_sidebar_container = pn.Column(sizing_mode="stretch_width")
|
| 37 |
+
page_main_container = pn.Column(sizing_mode="stretch_both")
|
| 38 |
+
|
| 39 |
+
page_title = pn.pane.Markdown("", sizing_mode="stretch_width")
|
| 40 |
+
back_button = pn.widgets.Button(
|
| 41 |
+
name="← Back to gallery",
|
| 42 |
+
button_type="primary",
|
| 43 |
+
width=180,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
home_button = pn.widgets.Button(
|
| 47 |
+
name=HOME_PAGE,
|
| 48 |
+
button_type="default",
|
| 49 |
+
width_policy="max",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _load_page(page_name: str) -> None:
|
| 54 |
+
if page_name == HOME_PAGE:
|
| 55 |
+
page_title.object = "## Applications"
|
| 56 |
+
|
| 57 |
+
tiles = []
|
| 58 |
+
for title, meta in PAGES.items():
|
| 59 |
+
btn = pn.widgets.Button(name="Open", button_type="primary", width=120)
|
| 60 |
+
btn.on_click(lambda e, t=title: _load_page(t))
|
| 61 |
+
|
| 62 |
+
tile = pn.Column(
|
| 63 |
+
pn.pane.Markdown(f"### {title}\n\n{meta.get('description', '')}"),
|
| 64 |
+
btn,
|
| 65 |
+
sizing_mode="stretch_width",
|
| 66 |
+
margin=(10, 10, 10, 10),
|
| 67 |
+
)
|
| 68 |
+
tiles.append(tile)
|
| 69 |
+
|
| 70 |
+
gallery = pn.GridBox(*tiles, ncols=2, sizing_mode="stretch_width")
|
| 71 |
+
page_sidebar_container.objects = [
|
| 72 |
+
pn.pane.Markdown(
|
| 73 |
+
"""### Bienvenue\n\nChoisis une application dans la gallery."""
|
| 74 |
+
)
|
| 75 |
+
]
|
| 76 |
+
page_main_container.objects = [page_title, gallery]
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
meta = PAGES.get(page_name)
|
| 80 |
+
if meta is None:
|
| 81 |
+
page_sidebar_container.objects = [
|
| 82 |
+
pn.pane.Alert("Unknown page", alert_type="danger")
|
| 83 |
+
]
|
| 84 |
+
page_main_container.objects = []
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
sidebar, main = meta["get_components"]()
|
| 88 |
+
page_title.object = f"## {page_name}"
|
| 89 |
+
page_sidebar_container.objects = [sidebar]
|
| 90 |
+
page_main_container.objects = [
|
| 91 |
+
pn.Row(back_button, pn.Spacer(), sizing_mode="stretch_width"),
|
| 92 |
+
page_title,
|
| 93 |
+
main,
|
| 94 |
+
]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
template = pn.template.MaterialTemplate(title="OML DB - Portal")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _go_home(event=None) -> None:
|
| 101 |
+
_load_page(HOME_PAGE)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
back_button.on_click(_go_home)
|
| 105 |
+
home_button.on_click(_go_home)
|
| 106 |
+
|
| 107 |
+
_load_page(HOME_PAGE)
|
| 108 |
+
|
| 109 |
+
template.sidebar.append(
|
| 110 |
+
pn.Column(
|
| 111 |
+
pn.pane.Markdown("## Navigation"),
|
| 112 |
+
home_button,
|
| 113 |
+
pn.layout.Divider(),
|
| 114 |
+
page_sidebar_container,
|
| 115 |
+
sizing_mode="stretch_width",
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
template.main.append(page_main_container)
|
| 120 |
+
|
| 121 |
+
template.servable()
|
panel_app/panel_v2_backend.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import os
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _project_root() -> str:
|
| 9 |
+
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def cache_root() -> str:
|
| 13 |
+
# Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev)
|
| 14 |
+
env_cache = os.environ.get("CACHE_DIR")
|
| 15 |
+
if env_cache:
|
| 16 |
+
path = os.path.join(env_cache, "panel_app_v2")
|
| 17 |
+
elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
|
| 18 |
+
# On Hugging Face Spaces and Linux containers, /tmp is always writable
|
| 19 |
+
path = os.path.join("/tmp", "panel_app_v2_cache")
|
| 20 |
+
else:
|
| 21 |
+
# Fallback to project root for local development
|
| 22 |
+
root = _project_root()
|
| 23 |
+
path = os.path.join(root, ".cache", "panel_app_v2")
|
| 24 |
+
os.makedirs(path, exist_ok=True)
|
| 25 |
+
return path
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _safe_str(value: object) -> str:
|
| 29 |
+
try:
|
| 30 |
+
return str(value or "")
|
| 31 |
+
except Exception:
|
| 32 |
+
return ""
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
|
| 36 |
+
h = hashlib.blake2b(digest_size=16)
|
| 37 |
+
if file_bytes:
|
| 38 |
+
h.update(file_bytes)
|
| 39 |
+
name = _safe_str(filename)
|
| 40 |
+
if name:
|
| 41 |
+
h.update(name.encode("utf-8", errors="ignore"))
|
| 42 |
+
ex = _safe_str(extra)
|
| 43 |
+
if ex:
|
| 44 |
+
h.update(ex.encode("utf-8", errors="ignore"))
|
| 45 |
+
return h.hexdigest()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _has_pyarrow() -> bool:
|
| 49 |
+
try:
|
| 50 |
+
import pyarrow # noqa: F401
|
| 51 |
+
|
| 52 |
+
return True
|
| 53 |
+
except Exception:
|
| 54 |
+
return False
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _has_duckdb() -> bool:
|
| 58 |
+
try:
|
| 59 |
+
import duckdb # noqa: F401
|
| 60 |
+
|
| 61 |
+
return True
|
| 62 |
+
except Exception:
|
| 63 |
+
return False
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
|
| 67 |
+
if _has_pyarrow():
|
| 68 |
+
path = path_no_ext + ".parquet"
|
| 69 |
+
df.to_parquet(path, index=False)
|
| 70 |
+
return path
|
| 71 |
+
path = path_no_ext + ".pkl"
|
| 72 |
+
df.to_pickle(path)
|
| 73 |
+
return path
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def read_table(path: str) -> pd.DataFrame:
|
| 77 |
+
if not path or not os.path.exists(path):
|
| 78 |
+
return pd.DataFrame()
|
| 79 |
+
p = str(path).lower()
|
| 80 |
+
if p.endswith(".parquet"):
|
| 81 |
+
return pd.read_parquet(path)
|
| 82 |
+
return pd.read_pickle(path)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@dataclass(frozen=True)
|
| 86 |
+
class CachedDataset:
|
| 87 |
+
dataset_id: str
|
| 88 |
+
rat: str
|
| 89 |
+
granularity: str
|
| 90 |
+
|
| 91 |
+
def base_dir(self) -> str:
|
| 92 |
+
return os.path.join(
|
| 93 |
+
cache_root(),
|
| 94 |
+
self.dataset_id,
|
| 95 |
+
f"rat={self.rat}",
|
| 96 |
+
f"granularity={self.granularity}",
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
def daily_table_base(self) -> str:
|
| 100 |
+
return os.path.join(self.base_dir(), "daily")
|
| 101 |
+
|
| 102 |
+
def meta_path(self) -> str:
|
| 103 |
+
return os.path.join(self.base_dir(), "meta.json")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
|
| 107 |
+
base = dataset.daily_table_base()
|
| 108 |
+
candidates = [base + ".parquet", base + ".pkl"]
|
| 109 |
+
for p in candidates:
|
| 110 |
+
if os.path.exists(p):
|
| 111 |
+
try:
|
| 112 |
+
df = read_table(p)
|
| 113 |
+
return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
|
| 114 |
+
except Exception:
|
| 115 |
+
return pd.DataFrame()
|
| 116 |
+
return None
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
|
| 120 |
+
os.makedirs(dataset.base_dir(), exist_ok=True)
|
| 121 |
+
return write_table(daily, dataset.daily_table_base())
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def ensure_duckdb_available() -> None:
|
| 125 |
+
if not _has_duckdb():
|
| 126 |
+
raise RuntimeError(
|
| 127 |
+
"DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
|
| 128 |
+
)
|
panel_app/trafic_analysis_panel.py
ADDED
|
@@ -0,0 +1,2459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import zipfile
|
| 5 |
+
from datetime import date, datetime, timedelta
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import panel as pn
|
| 10 |
+
import plotly.express as px
|
| 11 |
+
|
| 12 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
+
if ROOT_DIR not in sys.path:
|
| 14 |
+
sys.path.insert(0, ROOT_DIR)
|
| 15 |
+
|
| 16 |
+
from panel_app.convert_to_excel_panel import write_dfs_to_excel
|
| 17 |
+
from utils.utils_vars import get_physical_db
|
| 18 |
+
|
| 19 |
+
pn.extension(
|
| 20 |
+
"plotly",
|
| 21 |
+
"tabulator",
|
| 22 |
+
raw_css=[
|
| 23 |
+
":fullscreen { background-color: white; overflow: auto; }",
|
| 24 |
+
"::backdrop { background-color: white; }",
|
| 25 |
+
".plot-fullscreen-wrapper:fullscreen { padding: 20px; display: flex; flex-direction: column; }",
|
| 26 |
+
".plot-fullscreen-wrapper:fullscreen > * { height: 100% !important; width: 100% !important; }",
|
| 27 |
+
],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def read_fileinput_to_df(file_input: pn.widgets.FileInput) -> pd.DataFrame | None:
|
| 32 |
+
"""Read a Panel FileInput (ZIP or CSV) into a DataFrame.
|
| 33 |
+
|
| 34 |
+
Returns None if no file is provided.
|
| 35 |
+
"""
|
| 36 |
+
if file_input is None or not file_input.value:
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
filename = (file_input.filename or "").lower()
|
| 40 |
+
data = io.BytesIO(file_input.value)
|
| 41 |
+
|
| 42 |
+
if filename.endswith(".zip"):
|
| 43 |
+
with zipfile.ZipFile(data) as z:
|
| 44 |
+
csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
|
| 45 |
+
if not csv_files:
|
| 46 |
+
raise ValueError("No CSV file found in the ZIP archive")
|
| 47 |
+
with z.open(csv_files[0]) as f:
|
| 48 |
+
return pd.read_csv(f, encoding="latin1", sep=";", low_memory=False)
|
| 49 |
+
elif filename.endswith(".csv"):
|
| 50 |
+
return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
|
| 51 |
+
else:
|
| 52 |
+
raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def extract_code(name):
|
| 56 |
+
name = name.replace(" ", "_") if isinstance(name, str) else None
|
| 57 |
+
if name and len(name) >= 10:
|
| 58 |
+
try:
|
| 59 |
+
return int(name.split("_")[0])
|
| 60 |
+
except ValueError:
|
| 61 |
+
return None
|
| 62 |
+
return None
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
|
| 66 |
+
df = df[df["BCF name"].str.len() >= 10].copy()
|
| 67 |
+
df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
|
| 68 |
+
df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
|
| 69 |
+
df["code"] = df["BCF name"].apply(extract_code)
|
| 70 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 71 |
+
df = df[df["code"].notna()]
|
| 72 |
+
df["code"] = df["code"].astype(int)
|
| 73 |
+
date_format = (
|
| 74 |
+
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 75 |
+
)
|
| 76 |
+
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
|
| 77 |
+
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
|
| 78 |
+
|
| 79 |
+
if "TCH availability ratio" in df.columns:
|
| 80 |
+
df["2g_tch_avail"] = pd.to_numeric(
|
| 81 |
+
df["TCH availability ratio"], errors="coerce"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
agg_dict = {
|
| 85 |
+
"2g_data_trafic": "sum",
|
| 86 |
+
"2g_voice_trafic": "sum",
|
| 87 |
+
}
|
| 88 |
+
if "2g_tch_avail" in df.columns:
|
| 89 |
+
agg_dict["2g_tch_avail"] = "mean"
|
| 90 |
+
|
| 91 |
+
df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
|
| 92 |
+
return df
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
|
| 96 |
+
df = df[df["WBTS name"].str.len() >= 10].copy()
|
| 97 |
+
df["code"] = df["WBTS name"].apply(extract_code)
|
| 98 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 99 |
+
df = df[df["code"].notna()]
|
| 100 |
+
df["code"] = df["code"].astype(int)
|
| 101 |
+
date_format = (
|
| 102 |
+
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 103 |
+
)
|
| 104 |
+
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
|
| 105 |
+
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
|
| 106 |
+
df.rename(
|
| 107 |
+
columns={
|
| 108 |
+
"Total CS traffic - Erl": "3g_voice_trafic",
|
| 109 |
+
"Total_Data_Traffic": "3g_data_trafic",
|
| 110 |
+
},
|
| 111 |
+
inplace=True,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
kpi_col = None
|
| 115 |
+
for col in df.columns:
|
| 116 |
+
if "cell availability" in str(col).lower():
|
| 117 |
+
kpi_col = col
|
| 118 |
+
break
|
| 119 |
+
|
| 120 |
+
if kpi_col is not None:
|
| 121 |
+
df["3g_cell_avail"] = pd.to_numeric(df[kpi_col], errors="coerce")
|
| 122 |
+
|
| 123 |
+
agg_dict = {
|
| 124 |
+
"3g_voice_trafic": "sum",
|
| 125 |
+
"3g_data_trafic": "sum",
|
| 126 |
+
}
|
| 127 |
+
if "3g_cell_avail" in df.columns:
|
| 128 |
+
agg_dict["3g_cell_avail"] = "mean"
|
| 129 |
+
|
| 130 |
+
df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
|
| 131 |
+
return df
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
|
| 135 |
+
df = df[df["LNBTS name"].str.len() >= 10].copy()
|
| 136 |
+
df["lte_data_trafic"] = (
|
| 137 |
+
df["4G/LTE DL Traffic Volume (GBytes)"]
|
| 138 |
+
+ df["4G/LTE UL Traffic Volume (GBytes)"]
|
| 139 |
+
)
|
| 140 |
+
df["code"] = df["LNBTS name"].apply(extract_code)
|
| 141 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 142 |
+
df = df[df["code"].notna()]
|
| 143 |
+
df["code"] = df["code"].astype(int)
|
| 144 |
+
date_format = (
|
| 145 |
+
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 146 |
+
)
|
| 147 |
+
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
|
| 148 |
+
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
|
| 149 |
+
if "Cell Avail excl BLU" in df.columns:
|
| 150 |
+
df["lte_cell_avail"] = pd.to_numeric(df["Cell Avail excl BLU"], errors="coerce")
|
| 151 |
+
|
| 152 |
+
agg_dict = {"lte_data_trafic": "sum"}
|
| 153 |
+
if "lte_cell_avail" in df.columns:
|
| 154 |
+
agg_dict["lte_cell_avail"] = "mean"
|
| 155 |
+
|
| 156 |
+
df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
|
| 157 |
+
return df
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):
|
| 161 |
+
physical_db = get_physical_db()
|
| 162 |
+
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
|
| 163 |
+
physical_db["code"] = (
|
| 164 |
+
pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
|
| 165 |
+
)
|
| 166 |
+
physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
|
| 167 |
+
physical_db = physical_db.drop_duplicates(subset="code")
|
| 168 |
+
|
| 169 |
+
df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
|
| 170 |
+
df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
|
| 171 |
+
|
| 172 |
+
for col in [
|
| 173 |
+
"2g_data_trafic",
|
| 174 |
+
"2g_voice_trafic",
|
| 175 |
+
"3g_voice_trafic",
|
| 176 |
+
"3g_data_trafic",
|
| 177 |
+
"lte_data_trafic",
|
| 178 |
+
]:
|
| 179 |
+
if col not in df:
|
| 180 |
+
df[col] = 0
|
| 181 |
+
|
| 182 |
+
kpi_masks = {}
|
| 183 |
+
for kpi_col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]:
|
| 184 |
+
if kpi_col in df.columns:
|
| 185 |
+
kpi_masks[kpi_col] = df[kpi_col].notna()
|
| 186 |
+
|
| 187 |
+
df.fillna(0, inplace=True)
|
| 188 |
+
|
| 189 |
+
for kpi_col, mask in kpi_masks.items():
|
| 190 |
+
df.loc[~mask, kpi_col] = np.nan
|
| 191 |
+
|
| 192 |
+
df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
|
| 193 |
+
df["total_data_trafic"] = (
|
| 194 |
+
df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
|
| 195 |
+
)
|
| 196 |
+
df = pd.merge(df, physical_db, on=["code"], how="left")
|
| 197 |
+
|
| 198 |
+
pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
|
| 199 |
+
post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
|
| 200 |
+
last_period_start, last_period_end = (
|
| 201 |
+
pd.to_datetime(last_period_range[0]),
|
| 202 |
+
pd.to_datetime(last_period_range[1]),
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
last_period = df[
|
| 206 |
+
(df["date"] >= last_period_start) & (df["date"] <= last_period_end)
|
| 207 |
+
]
|
| 208 |
+
|
| 209 |
+
def assign_period(x):
|
| 210 |
+
if pre_start <= x <= pre_end:
|
| 211 |
+
return "pre"
|
| 212 |
+
if post_start <= x <= post_end:
|
| 213 |
+
return "post"
|
| 214 |
+
return "other"
|
| 215 |
+
|
| 216 |
+
df["period"] = df["date"].apply(assign_period)
|
| 217 |
+
|
| 218 |
+
comparison = df[df["period"].isin(["pre", "post"])]
|
| 219 |
+
|
| 220 |
+
sum_pivot = (
|
| 221 |
+
comparison.groupby(["code", "period"])[
|
| 222 |
+
["total_voice_trafic", "total_data_trafic"]
|
| 223 |
+
]
|
| 224 |
+
.sum()
|
| 225 |
+
.unstack()
|
| 226 |
+
)
|
| 227 |
+
sum_pivot.columns = [f"{metric}_{period}" for metric, period in sum_pivot.columns]
|
| 228 |
+
sum_pivot = sum_pivot.reset_index()
|
| 229 |
+
|
| 230 |
+
sum_pivot["total_voice_trafic_diff"] = (
|
| 231 |
+
sum_pivot["total_voice_trafic_post"] - sum_pivot["total_voice_trafic_pre"]
|
| 232 |
+
)
|
| 233 |
+
sum_pivot["total_data_trafic_diff"] = (
|
| 234 |
+
sum_pivot["total_data_trafic_post"] - sum_pivot["total_data_trafic_pre"]
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
for metric in ["total_voice_trafic", "total_data_trafic"]:
|
| 238 |
+
sum_pivot[f"{metric}_diff_pct"] = (
|
| 239 |
+
(sum_pivot.get(f"{metric}_post", 0) - sum_pivot.get(f"{metric}_pre", 0))
|
| 240 |
+
/ sum_pivot.get(f"{metric}_pre", 1)
|
| 241 |
+
) * 100
|
| 242 |
+
|
| 243 |
+
sum_order = [
|
| 244 |
+
"code",
|
| 245 |
+
"total_voice_trafic_pre",
|
| 246 |
+
"total_voice_trafic_post",
|
| 247 |
+
"total_voice_trafic_diff",
|
| 248 |
+
"total_voice_trafic_diff_pct",
|
| 249 |
+
"total_data_trafic_pre",
|
| 250 |
+
"total_data_trafic_post",
|
| 251 |
+
"total_data_trafic_diff",
|
| 252 |
+
"total_data_trafic_diff_pct",
|
| 253 |
+
]
|
| 254 |
+
sum_existing_cols = [col for col in sum_order if col in sum_pivot.columns]
|
| 255 |
+
sum_remaining_cols = [
|
| 256 |
+
col for col in sum_pivot.columns if col not in sum_existing_cols
|
| 257 |
+
]
|
| 258 |
+
sum_pivot = sum_pivot[sum_existing_cols + sum_remaining_cols]
|
| 259 |
+
|
| 260 |
+
avg_pivot = (
|
| 261 |
+
comparison.groupby(["code", "period"])[
|
| 262 |
+
["total_voice_trafic", "total_data_trafic"]
|
| 263 |
+
]
|
| 264 |
+
.mean()
|
| 265 |
+
.unstack()
|
| 266 |
+
)
|
| 267 |
+
avg_pivot.columns = [f"{metric}_{period}" for metric, period in avg_pivot.columns]
|
| 268 |
+
avg_pivot = avg_pivot.reset_index()
|
| 269 |
+
|
| 270 |
+
avg_pivot["total_voice_trafic_diff"] = (
|
| 271 |
+
avg_pivot["total_voice_trafic_post"] - avg_pivot["total_voice_trafic_pre"]
|
| 272 |
+
)
|
| 273 |
+
avg_pivot["total_data_trafic_diff"] = (
|
| 274 |
+
avg_pivot["total_data_trafic_post"] - avg_pivot["total_data_trafic_pre"]
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
for metric in ["total_voice_trafic", "total_data_trafic"]:
|
| 278 |
+
avg_pivot[f"{metric}_diff_pct"] = (
|
| 279 |
+
(avg_pivot.get(f"{metric}_post", 0) - avg_pivot.get(f"{metric}_pre", 0))
|
| 280 |
+
/ avg_pivot.get(f"{metric}_pre", 1)
|
| 281 |
+
) * 100
|
| 282 |
+
|
| 283 |
+
avg_pivot = avg_pivot.rename(
|
| 284 |
+
columns={
|
| 285 |
+
"total_voice_trafic_pre": "avg_voice_trafic_pre",
|
| 286 |
+
"total_voice_trafic_post": "avg_voice_trafic_post",
|
| 287 |
+
"total_voice_trafic_diff": "avg_voice_trafic_diff",
|
| 288 |
+
"total_voice_trafic_diff_pct": "avg_voice_trafic_diff_pct",
|
| 289 |
+
"total_data_trafic_pre": "avg_data_trafic_pre",
|
| 290 |
+
"total_data_trafic_post": "avg_data_trafic_post",
|
| 291 |
+
"total_data_trafic_diff": "avg_data_trafic_diff",
|
| 292 |
+
"total_data_trafic_diff_pct": "avg_data_trafic_diff_pct",
|
| 293 |
+
}
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
avg_order = [
|
| 297 |
+
"code",
|
| 298 |
+
"avg_voice_trafic_pre",
|
| 299 |
+
"avg_voice_trafic_post",
|
| 300 |
+
"avg_voice_trafic_diff",
|
| 301 |
+
"avg_voice_trafic_diff_pct",
|
| 302 |
+
"avg_data_trafic_pre",
|
| 303 |
+
"avg_data_trafic_post",
|
| 304 |
+
"avg_data_trafic_diff",
|
| 305 |
+
"avg_data_trafic_diff_pct",
|
| 306 |
+
]
|
| 307 |
+
avg_existing_cols = [col for col in avg_order if col in avg_pivot.columns]
|
| 308 |
+
avg_remaining_cols = [
|
| 309 |
+
col for col in avg_pivot.columns if col not in avg_existing_cols
|
| 310 |
+
]
|
| 311 |
+
avg_pivot = avg_pivot[avg_existing_cols + avg_remaining_cols]
|
| 312 |
+
|
| 313 |
+
return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
|
| 317 |
+
avail_col = "2g_tch_avail"
|
| 318 |
+
|
| 319 |
+
if avail_col not in df.columns or "period" not in df.columns:
|
| 320 |
+
return None, None
|
| 321 |
+
|
| 322 |
+
df_2g = df[df[avail_col].notna()].copy()
|
| 323 |
+
df_2g = df_2g[df_2g["period"].isin(["pre", "post"])]
|
| 324 |
+
|
| 325 |
+
if df_2g.empty:
|
| 326 |
+
return None, None
|
| 327 |
+
|
| 328 |
+
site_pivot = df_2g.groupby(["code", "period"])[avail_col].mean().unstack()
|
| 329 |
+
|
| 330 |
+
site_pivot = site_pivot.rename(
|
| 331 |
+
columns={"pre": "tch_avail_pre", "post": "tch_avail_post"}
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
if "tch_avail_pre" not in site_pivot.columns:
|
| 335 |
+
site_pivot["tch_avail_pre"] = pd.NA
|
| 336 |
+
if "tch_avail_post" not in site_pivot.columns:
|
| 337 |
+
site_pivot["tch_avail_post"] = pd.NA
|
| 338 |
+
|
| 339 |
+
site_pivot["tch_avail_diff"] = (
|
| 340 |
+
site_pivot["tch_avail_post"] - site_pivot["tch_avail_pre"]
|
| 341 |
+
)
|
| 342 |
+
site_pivot["pre_ok_vs_sla"] = site_pivot["tch_avail_pre"] >= sla_2g
|
| 343 |
+
site_pivot["post_ok_vs_sla"] = site_pivot["tch_avail_post"] >= sla_2g
|
| 344 |
+
|
| 345 |
+
site_pivot = site_pivot.reset_index()
|
| 346 |
+
|
| 347 |
+
summary_rows = []
|
| 348 |
+
for period_label, col_name in [
|
| 349 |
+
("pre", "tch_avail_pre"),
|
| 350 |
+
("post", "tch_avail_post"),
|
| 351 |
+
]:
|
| 352 |
+
series = site_pivot[col_name].dropna()
|
| 353 |
+
total_cells = series.shape[0]
|
| 354 |
+
if total_cells == 0:
|
| 355 |
+
summary_rows.append(
|
| 356 |
+
{
|
| 357 |
+
"period": period_label,
|
| 358 |
+
"cells": 0,
|
| 359 |
+
"avg_availability": pd.NA,
|
| 360 |
+
"median_availability": pd.NA,
|
| 361 |
+
"p05_availability": pd.NA,
|
| 362 |
+
"p95_availability": pd.NA,
|
| 363 |
+
"min_availability": pd.NA,
|
| 364 |
+
"max_availability": pd.NA,
|
| 365 |
+
"cells_ge_sla": 0,
|
| 366 |
+
"cells_lt_sla": 0,
|
| 367 |
+
"pct_cells_ge_sla": pd.NA,
|
| 368 |
+
}
|
| 369 |
+
)
|
| 370 |
+
continue
|
| 371 |
+
cells_ge_sla = (series >= sla_2g).sum()
|
| 372 |
+
cells_lt_sla = (series < sla_2g).sum()
|
| 373 |
+
summary_rows.append(
|
| 374 |
+
{
|
| 375 |
+
"period": period_label,
|
| 376 |
+
"cells": int(total_cells),
|
| 377 |
+
"avg_availability": series.mean(),
|
| 378 |
+
"median_availability": series.median(),
|
| 379 |
+
"p05_availability": series.quantile(0.05),
|
| 380 |
+
"p95_availability": series.quantile(0.95),
|
| 381 |
+
"min_availability": series.min(),
|
| 382 |
+
"max_availability": series.max(),
|
| 383 |
+
"cells_ge_sla": int(cells_ge_sla),
|
| 384 |
+
"cells_lt_sla": int(cells_lt_sla),
|
| 385 |
+
"pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
|
| 386 |
+
}
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
summary_df = pd.DataFrame(summary_rows)
|
| 390 |
+
|
| 391 |
+
return summary_df, site_pivot
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def analyze_3g_availability(df: pd.DataFrame, sla_3g: float):
|
| 395 |
+
avail_col = "3g_cell_avail"
|
| 396 |
+
|
| 397 |
+
if avail_col not in df.columns or "period" not in df.columns:
|
| 398 |
+
return None, None
|
| 399 |
+
|
| 400 |
+
df_3g = df[df[avail_col].notna()].copy()
|
| 401 |
+
df_3g = df_3g[df_3g["period"].isin(["pre", "post"])]
|
| 402 |
+
|
| 403 |
+
if df_3g.empty:
|
| 404 |
+
return None, None
|
| 405 |
+
|
| 406 |
+
site_pivot = df_3g.groupby(["code", "period"])[avail_col].mean().unstack()
|
| 407 |
+
|
| 408 |
+
site_pivot = site_pivot.rename(
|
| 409 |
+
columns={"pre": "cell_avail_pre", "post": "cell_avail_post"}
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
if "cell_avail_pre" not in site_pivot.columns:
|
| 413 |
+
site_pivot["cell_avail_pre"] = pd.NA
|
| 414 |
+
if "cell_avail_post" not in site_pivot.columns:
|
| 415 |
+
site_pivot["cell_avail_post"] = pd.NA
|
| 416 |
+
|
| 417 |
+
site_pivot["cell_avail_diff"] = (
|
| 418 |
+
site_pivot["cell_avail_post"] - site_pivot["cell_avail_pre"]
|
| 419 |
+
)
|
| 420 |
+
site_pivot["pre_ok_vs_sla"] = site_pivot["cell_avail_pre"] >= sla_3g
|
| 421 |
+
site_pivot["post_ok_vs_sla"] = site_pivot["cell_avail_post"] >= sla_3g
|
| 422 |
+
|
| 423 |
+
site_pivot = site_pivot.reset_index()
|
| 424 |
+
|
| 425 |
+
summary_rows = []
|
| 426 |
+
for period_label, col_name in [
|
| 427 |
+
("pre", "cell_avail_pre"),
|
| 428 |
+
("post", "cell_avail_post"),
|
| 429 |
+
]:
|
| 430 |
+
series = site_pivot[col_name].dropna()
|
| 431 |
+
total_cells = series.shape[0]
|
| 432 |
+
if total_cells == 0:
|
| 433 |
+
summary_rows.append(
|
| 434 |
+
{
|
| 435 |
+
"period": period_label,
|
| 436 |
+
"cells": 0,
|
| 437 |
+
"avg_availability": pd.NA,
|
| 438 |
+
"median_availability": pd.NA,
|
| 439 |
+
"p05_availability": pd.NA,
|
| 440 |
+
"p95_availability": pd.NA,
|
| 441 |
+
"min_availability": pd.NA,
|
| 442 |
+
"max_availability": pd.NA,
|
| 443 |
+
"cells_ge_sla": 0,
|
| 444 |
+
"cells_lt_sla": 0,
|
| 445 |
+
"pct_cells_ge_sla": pd.NA,
|
| 446 |
+
}
|
| 447 |
+
)
|
| 448 |
+
continue
|
| 449 |
+
cells_ge_sla = (series >= sla_3g).sum()
|
| 450 |
+
cells_lt_sla = (series < sla_3g).sum()
|
| 451 |
+
summary_rows.append(
|
| 452 |
+
{
|
| 453 |
+
"period": period_label,
|
| 454 |
+
"cells": int(total_cells),
|
| 455 |
+
"avg_availability": series.mean(),
|
| 456 |
+
"median_availability": series.median(),
|
| 457 |
+
"p05_availability": series.quantile(0.05),
|
| 458 |
+
"p95_availability": series.quantile(0.95),
|
| 459 |
+
"min_availability": series.min(),
|
| 460 |
+
"max_availability": series.max(),
|
| 461 |
+
"cells_ge_sla": int(cells_ge_sla),
|
| 462 |
+
"cells_lt_sla": int(cells_lt_sla),
|
| 463 |
+
"pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
|
| 464 |
+
}
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
summary_df = pd.DataFrame(summary_rows)
|
| 468 |
+
|
| 469 |
+
return summary_df, site_pivot
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
def analyze_lte_availability(df: pd.DataFrame, sla_lte: float):
|
| 473 |
+
avail_col = "lte_cell_avail"
|
| 474 |
+
|
| 475 |
+
if avail_col not in df.columns or "period" not in df.columns:
|
| 476 |
+
return None, None
|
| 477 |
+
|
| 478 |
+
df_lte = df[df[avail_col].notna()].copy()
|
| 479 |
+
df_lte = df_lte[df_lte["period"].isin(["pre", "post"])]
|
| 480 |
+
|
| 481 |
+
if df_lte.empty:
|
| 482 |
+
return None, None
|
| 483 |
+
|
| 484 |
+
site_pivot = df_lte.groupby(["code", "period"])[avail_col].mean().unstack()
|
| 485 |
+
|
| 486 |
+
site_pivot = site_pivot.rename(
|
| 487 |
+
columns={"pre": "lte_avail_pre", "post": "lte_avail_post"}
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
if "lte_avail_pre" not in site_pivot.columns:
|
| 491 |
+
site_pivot["lte_avail_pre"] = pd.NA
|
| 492 |
+
if "lte_avail_post" not in site_pivot.columns:
|
| 493 |
+
site_pivot["lte_avail_post"] = pd.NA
|
| 494 |
+
|
| 495 |
+
site_pivot["lte_avail_diff"] = (
|
| 496 |
+
site_pivot["lte_avail_post"] - site_pivot["lte_avail_pre"]
|
| 497 |
+
)
|
| 498 |
+
site_pivot["pre_ok_vs_sla"] = site_pivot["lte_avail_pre"] >= sla_lte
|
| 499 |
+
site_pivot["post_ok_vs_sla"] = site_pivot["lte_avail_post"] >= sla_lte
|
| 500 |
+
|
| 501 |
+
site_pivot = site_pivot.reset_index()
|
| 502 |
+
|
| 503 |
+
summary_rows = []
|
| 504 |
+
for period_label, col_name in [
|
| 505 |
+
("pre", "lte_avail_pre"),
|
| 506 |
+
("post", "lte_avail_post"),
|
| 507 |
+
]:
|
| 508 |
+
series = site_pivot[col_name].dropna()
|
| 509 |
+
total_cells = series.shape[0]
|
| 510 |
+
if total_cells == 0:
|
| 511 |
+
summary_rows.append(
|
| 512 |
+
{
|
| 513 |
+
"period": period_label,
|
| 514 |
+
"cells": 0,
|
| 515 |
+
"avg_availability": pd.NA,
|
| 516 |
+
"median_availability": pd.NA,
|
| 517 |
+
"p05_availability": pd.NA,
|
| 518 |
+
"p95_availability": pd.NA,
|
| 519 |
+
"min_availability": pd.NA,
|
| 520 |
+
"max_availability": pd.NA,
|
| 521 |
+
"cells_ge_sla": 0,
|
| 522 |
+
"cells_lt_sla": 0,
|
| 523 |
+
"pct_cells_ge_sla": pd.NA,
|
| 524 |
+
}
|
| 525 |
+
)
|
| 526 |
+
continue
|
| 527 |
+
cells_ge_sla = (series >= sla_lte).sum()
|
| 528 |
+
cells_lt_sla = (series < sla_lte).sum()
|
| 529 |
+
summary_rows.append(
|
| 530 |
+
{
|
| 531 |
+
"period": period_label,
|
| 532 |
+
"cells": int(total_cells),
|
| 533 |
+
"avg_availability": series.mean(),
|
| 534 |
+
"median_availability": series.median(),
|
| 535 |
+
"p05_availability": series.quantile(0.05),
|
| 536 |
+
"p95_availability": series.quantile(0.95),
|
| 537 |
+
"min_availability": series.min(),
|
| 538 |
+
"max_availability": series.max(),
|
| 539 |
+
"cells_ge_sla": int(cells_ge_sla),
|
| 540 |
+
"cells_lt_sla": int(cells_lt_sla),
|
| 541 |
+
"pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
|
| 542 |
+
}
|
| 543 |
+
)
|
| 544 |
+
|
| 545 |
+
summary_df = pd.DataFrame(summary_rows)
|
| 546 |
+
|
| 547 |
+
return summary_df, site_pivot
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
def analyze_multirat_availability(
|
| 551 |
+
df: pd.DataFrame, sla_2g: float, sla_3g: float, sla_lte: float
|
| 552 |
+
):
|
| 553 |
+
if "period" not in df.columns:
|
| 554 |
+
return None
|
| 555 |
+
|
| 556 |
+
rat_cols = []
|
| 557 |
+
if "2g_tch_avail" in df.columns:
|
| 558 |
+
rat_cols.append("2g_tch_avail")
|
| 559 |
+
if "3g_cell_avail" in df.columns:
|
| 560 |
+
rat_cols.append("3g_cell_avail")
|
| 561 |
+
if "lte_cell_avail" in df.columns:
|
| 562 |
+
rat_cols.append("lte_cell_avail")
|
| 563 |
+
|
| 564 |
+
if not rat_cols:
|
| 565 |
+
return None
|
| 566 |
+
|
| 567 |
+
agg_dict = {col: "mean" for col in rat_cols}
|
| 568 |
+
|
| 569 |
+
df_pre = df[df["period"] == "pre"]
|
| 570 |
+
df_post = df[df["period"] == "post"]
|
| 571 |
+
|
| 572 |
+
pre = df_pre.groupby("code", as_index=False).agg(agg_dict)
|
| 573 |
+
post = df_post.groupby("code", as_index=False).agg(agg_dict)
|
| 574 |
+
|
| 575 |
+
rename_map_pre = {
|
| 576 |
+
"2g_tch_avail": "2g_avail_pre",
|
| 577 |
+
"3g_cell_avail": "3g_avail_pre",
|
| 578 |
+
"lte_cell_avail": "lte_avail_pre",
|
| 579 |
+
}
|
| 580 |
+
rename_map_post = {
|
| 581 |
+
"2g_tch_avail": "2g_avail_post",
|
| 582 |
+
"3g_cell_avail": "3g_avail_post",
|
| 583 |
+
"lte_cell_avail": "lte_avail_post",
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
pre = pre.rename(columns=rename_map_pre)
|
| 587 |
+
post = post.rename(columns=rename_map_post)
|
| 588 |
+
|
| 589 |
+
multi = pd.merge(pre, post, on="code", how="outer")
|
| 590 |
+
|
| 591 |
+
if not df_post.empty and {
|
| 592 |
+
"total_voice_trafic",
|
| 593 |
+
"total_data_trafic",
|
| 594 |
+
}.issubset(df_post.columns):
|
| 595 |
+
post_traffic = (
|
| 596 |
+
df_post.groupby("code", as_index=False)[
|
| 597 |
+
["total_voice_trafic", "total_data_trafic"]
|
| 598 |
+
]
|
| 599 |
+
.sum()
|
| 600 |
+
.rename(
|
| 601 |
+
columns={
|
| 602 |
+
"total_voice_trafic": "post_total_voice_trafic",
|
| 603 |
+
"total_data_trafic": "post_total_data_trafic",
|
| 604 |
+
}
|
| 605 |
+
)
|
| 606 |
+
)
|
| 607 |
+
multi = pd.merge(multi, post_traffic, on="code", how="left")
|
| 608 |
+
|
| 609 |
+
if "City" in df.columns:
|
| 610 |
+
city_df = df[["code", "City"]].drop_duplicates("code")
|
| 611 |
+
multi = pd.merge(multi, city_df, on="code", how="left")
|
| 612 |
+
|
| 613 |
+
def _ok_flag(series: pd.Series, sla: float) -> pd.Series:
|
| 614 |
+
if series.name not in multi.columns:
|
| 615 |
+
return pd.Series([pd.NA] * len(multi), index=multi.index)
|
| 616 |
+
ok = multi[series.name] >= sla
|
| 617 |
+
ok = ok.where(multi[series.name].notna(), pd.NA)
|
| 618 |
+
return ok
|
| 619 |
+
|
| 620 |
+
if "2g_avail_post" in multi.columns:
|
| 621 |
+
multi["ok_2g_post"] = _ok_flag(multi["2g_avail_post"], sla_2g)
|
| 622 |
+
if "3g_avail_post" in multi.columns:
|
| 623 |
+
multi["ok_3g_post"] = _ok_flag(multi["3g_avail_post"], sla_3g)
|
| 624 |
+
if "lte_avail_post" in multi.columns:
|
| 625 |
+
multi["ok_lte_post"] = _ok_flag(multi["lte_avail_post"], sla_lte)
|
| 626 |
+
|
| 627 |
+
def classify_row(row):
|
| 628 |
+
rats_status = []
|
| 629 |
+
for rat, col in [
|
| 630 |
+
("2G", "ok_2g_post"),
|
| 631 |
+
("3G", "ok_3g_post"),
|
| 632 |
+
("LTE", "ok_lte_post"),
|
| 633 |
+
]:
|
| 634 |
+
if col in row and not pd.isna(row[col]):
|
| 635 |
+
rats_status.append((rat, bool(row[col])))
|
| 636 |
+
|
| 637 |
+
if not rats_status:
|
| 638 |
+
return "No RAT data"
|
| 639 |
+
|
| 640 |
+
bad_rats = [rat for rat, ok in rats_status if not ok]
|
| 641 |
+
if not bad_rats:
|
| 642 |
+
return "OK all RAT"
|
| 643 |
+
if len(bad_rats) == 1:
|
| 644 |
+
return f"Degraded {bad_rats[0]} only"
|
| 645 |
+
return "Degraded multi-RAT (" + ",".join(bad_rats) + ")"
|
| 646 |
+
|
| 647 |
+
multi["post_multirat_status"] = multi.apply(classify_row, axis=1)
|
| 648 |
+
|
| 649 |
+
ordered_cols = ["code"]
|
| 650 |
+
if "City" in multi.columns:
|
| 651 |
+
ordered_cols.append("City")
|
| 652 |
+
for col in [
|
| 653 |
+
"2g_avail_pre",
|
| 654 |
+
"2g_avail_post",
|
| 655 |
+
"3g_avail_pre",
|
| 656 |
+
"3g_avail_post",
|
| 657 |
+
"lte_avail_pre",
|
| 658 |
+
"lte_avail_post",
|
| 659 |
+
"post_total_voice_trafic",
|
| 660 |
+
"post_total_data_trafic",
|
| 661 |
+
"ok_2g_post",
|
| 662 |
+
"ok_3g_post",
|
| 663 |
+
"ok_lte_post",
|
| 664 |
+
"post_multirat_status",
|
| 665 |
+
]:
|
| 666 |
+
if col in multi.columns:
|
| 667 |
+
ordered_cols.append(col)
|
| 668 |
+
|
| 669 |
+
remaining_cols = [c for c in multi.columns if c not in ordered_cols]
|
| 670 |
+
multi = multi[ordered_cols + remaining_cols]
|
| 671 |
+
|
| 672 |
+
return multi
|
| 673 |
+
|
| 674 |
+
|
| 675 |
+
def analyze_persistent_availability(
|
| 676 |
+
df: pd.DataFrame,
|
| 677 |
+
multi_rat_df: pd.DataFrame,
|
| 678 |
+
sla_2g: float,
|
| 679 |
+
sla_3g: float,
|
| 680 |
+
sla_lte: float,
|
| 681 |
+
min_consecutive_days: int = 3,
|
| 682 |
+
) -> pd.DataFrame:
|
| 683 |
+
if df is None or df.empty:
|
| 684 |
+
return pd.DataFrame()
|
| 685 |
+
if "date" not in df.columns or "code" not in df.columns:
|
| 686 |
+
return pd.DataFrame()
|
| 687 |
+
|
| 688 |
+
work_df = df.copy()
|
| 689 |
+
work_df["date_only"] = work_df["date"].dt.date
|
| 690 |
+
|
| 691 |
+
site_stats = {}
|
| 692 |
+
|
| 693 |
+
def _update_stats(rat_key_prefix: str, grouped: pd.DataFrame, sla: float) -> None:
|
| 694 |
+
if grouped.empty:
|
| 695 |
+
return
|
| 696 |
+
for code, group in grouped.groupby("code"):
|
| 697 |
+
group = group.sort_values("date_only")
|
| 698 |
+
dates = pd.to_datetime(group["date_only"]).tolist()
|
| 699 |
+
below_flags = (group["value"] < sla).tolist()
|
| 700 |
+
max_streak = 0
|
| 701 |
+
current_streak = 0
|
| 702 |
+
total_below = 0
|
| 703 |
+
last_date = None
|
| 704 |
+
for flag, current_date in zip(below_flags, dates):
|
| 705 |
+
if flag:
|
| 706 |
+
total_below += 1
|
| 707 |
+
if (
|
| 708 |
+
last_date is not None
|
| 709 |
+
and current_date == last_date + timedelta(days=1)
|
| 710 |
+
and current_streak > 0
|
| 711 |
+
):
|
| 712 |
+
current_streak += 1
|
| 713 |
+
else:
|
| 714 |
+
current_streak = 1
|
| 715 |
+
if current_streak > max_streak:
|
| 716 |
+
max_streak = current_streak
|
| 717 |
+
else:
|
| 718 |
+
current_streak = 0
|
| 719 |
+
last_date = current_date
|
| 720 |
+
stats = site_stats.setdefault(
|
| 721 |
+
code,
|
| 722 |
+
{
|
| 723 |
+
"code": code,
|
| 724 |
+
"max_streak_2g": 0,
|
| 725 |
+
"max_streak_3g": 0,
|
| 726 |
+
"max_streak_lte": 0,
|
| 727 |
+
"below_days_2g": 0,
|
| 728 |
+
"below_days_3g": 0,
|
| 729 |
+
"below_days_lte": 0,
|
| 730 |
+
},
|
| 731 |
+
)
|
| 732 |
+
stats[f"max_streak_{rat_key_prefix}"] = max_streak
|
| 733 |
+
stats[f"below_days_{rat_key_prefix}"] = total_below
|
| 734 |
+
|
| 735 |
+
for rat_col, rat_key, sla in [
|
| 736 |
+
("2g_tch_avail", "2g", sla_2g),
|
| 737 |
+
("3g_cell_avail", "3g", sla_3g),
|
| 738 |
+
("lte_cell_avail", "lte", sla_lte),
|
| 739 |
+
]:
|
| 740 |
+
if rat_col in work_df.columns:
|
| 741 |
+
g = (
|
| 742 |
+
work_df.dropna(subset=[rat_col])
|
| 743 |
+
.groupby(["code", "date_only"])[rat_col]
|
| 744 |
+
.mean()
|
| 745 |
+
.reset_index()
|
| 746 |
+
)
|
| 747 |
+
g = g.rename(columns={rat_col: "value"})
|
| 748 |
+
_update_stats(rat_key, g, sla)
|
| 749 |
+
|
| 750 |
+
if not site_stats:
|
| 751 |
+
return pd.DataFrame()
|
| 752 |
+
|
| 753 |
+
rows = []
|
| 754 |
+
for code, s in site_stats.items():
|
| 755 |
+
max_2g = s.get("max_streak_2g", 0)
|
| 756 |
+
max_3g = s.get("max_streak_3g", 0)
|
| 757 |
+
max_lte = s.get("max_streak_lte", 0)
|
| 758 |
+
below_2g = s.get("below_days_2g", 0)
|
| 759 |
+
below_3g = s.get("below_days_3g", 0)
|
| 760 |
+
below_lte = s.get("below_days_lte", 0)
|
| 761 |
+
persistent_2g = max_2g >= min_consecutive_days if max_2g else False
|
| 762 |
+
persistent_3g = max_3g >= min_consecutive_days if max_3g else False
|
| 763 |
+
persistent_lte = max_lte >= min_consecutive_days if max_lte else False
|
| 764 |
+
total_below_any = below_2g + below_3g + below_lte
|
| 765 |
+
persistent_any = persistent_2g or persistent_3g or persistent_lte
|
| 766 |
+
rats_persistent_count = sum(
|
| 767 |
+
[persistent_2g is True, persistent_3g is True, persistent_lte is True]
|
| 768 |
+
)
|
| 769 |
+
rows.append(
|
| 770 |
+
{
|
| 771 |
+
"code": code,
|
| 772 |
+
"persistent_issue_2g": persistent_2g,
|
| 773 |
+
"persistent_issue_3g": persistent_3g,
|
| 774 |
+
"persistent_issue_lte": persistent_lte,
|
| 775 |
+
"max_consecutive_days_2g": max_2g,
|
| 776 |
+
"max_consecutive_days_3g": max_3g,
|
| 777 |
+
"max_consecutive_days_lte": max_lte,
|
| 778 |
+
"total_below_days_2g": below_2g,
|
| 779 |
+
"total_below_days_3g": below_3g,
|
| 780 |
+
"total_below_days_lte": below_lte,
|
| 781 |
+
"total_below_days_any": total_below_any,
|
| 782 |
+
"persistent_issue_any": persistent_any,
|
| 783 |
+
"persistent_rats_count": rats_persistent_count,
|
| 784 |
+
}
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
+
result = pd.DataFrame(rows)
|
| 788 |
+
result = result[result["persistent_issue_any"] == True]
|
| 789 |
+
if result.empty:
|
| 790 |
+
return result
|
| 791 |
+
|
| 792 |
+
if multi_rat_df is not None and not multi_rat_df.empty:
|
| 793 |
+
cols_to_merge = [
|
| 794 |
+
c
|
| 795 |
+
for c in [
|
| 796 |
+
"code",
|
| 797 |
+
"City",
|
| 798 |
+
"post_total_voice_trafic",
|
| 799 |
+
"post_total_data_trafic",
|
| 800 |
+
"post_multirat_status",
|
| 801 |
+
]
|
| 802 |
+
if c in multi_rat_df.columns
|
| 803 |
+
]
|
| 804 |
+
if cols_to_merge:
|
| 805 |
+
result = pd.merge(
|
| 806 |
+
result,
|
| 807 |
+
multi_rat_df[cols_to_merge].drop_duplicates("code"),
|
| 808 |
+
on="code",
|
| 809 |
+
how="left",
|
| 810 |
+
)
|
| 811 |
+
|
| 812 |
+
if "post_total_data_trafic" not in result.columns:
|
| 813 |
+
result["post_total_data_trafic"] = 0.0
|
| 814 |
+
|
| 815 |
+
result["criticity_score"] = (
|
| 816 |
+
result["post_total_data_trafic"].fillna(0) * 1.0
|
| 817 |
+
+ result["total_below_days_any"].fillna(0) * 100.0
|
| 818 |
+
+ result["persistent_rats_count"].fillna(0) * 1000.0
|
| 819 |
+
)
|
| 820 |
+
|
| 821 |
+
result = result.sort_values(
|
| 822 |
+
by=["criticity_score", "total_below_days_any"], ascending=[False, False]
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
return result
|
| 826 |
+
|
| 827 |
+
|
| 828 |
+
def monthly_data_analysis(df: pd.DataFrame):
|
| 829 |
+
df["date"] = pd.to_datetime(df["date"])
|
| 830 |
+
df["month_year"] = df["date"].dt.to_period("M").astype(str)
|
| 831 |
+
|
| 832 |
+
voice_trafic = df.pivot_table(
|
| 833 |
+
index="code",
|
| 834 |
+
columns="month_year",
|
| 835 |
+
values="total_voice_trafic",
|
| 836 |
+
aggfunc="sum",
|
| 837 |
+
fill_value=0,
|
| 838 |
+
)
|
| 839 |
+
voice_trafic = voice_trafic.reindex(sorted(voice_trafic.columns), axis=1)
|
| 840 |
+
|
| 841 |
+
data_trafic = df.pivot_table(
|
| 842 |
+
index="code",
|
| 843 |
+
columns="month_year",
|
| 844 |
+
values="total_data_trafic",
|
| 845 |
+
aggfunc="sum",
|
| 846 |
+
fill_value=0,
|
| 847 |
+
)
|
| 848 |
+
data_trafic = data_trafic.reindex(sorted(data_trafic.columns), axis=1)
|
| 849 |
+
|
| 850 |
+
return voice_trafic, data_trafic
|
| 851 |
+
|
| 852 |
+
|
| 853 |
+
# --------------------------------------------------------------------------------------
|
| 854 |
+
# Global state for drill-down views & export
|
| 855 |
+
# --------------------------------------------------------------------------------------
|
| 856 |
+
|
| 857 |
+
current_full_df: pd.DataFrame | None = None
|
| 858 |
+
current_last_period_df: pd.DataFrame | None = None
|
| 859 |
+
current_analysis_df: pd.DataFrame | None = None
|
| 860 |
+
current_analysis_last_period_df: pd.DataFrame | None = None
|
| 861 |
+
|
| 862 |
+
current_multi_rat_df: pd.DataFrame | None = None
|
| 863 |
+
current_persistent_df: pd.DataFrame | None = None
|
| 864 |
+
|
| 865 |
+
current_site_2g_avail: pd.DataFrame | None = None
|
| 866 |
+
current_site_3g_avail: pd.DataFrame | None = None
|
| 867 |
+
current_site_lte_avail: pd.DataFrame | None = None
|
| 868 |
+
|
| 869 |
+
current_summary_2g_avail: pd.DataFrame | None = None
|
| 870 |
+
current_summary_3g_avail: pd.DataFrame | None = None
|
| 871 |
+
current_summary_lte_avail: pd.DataFrame | None = None
|
| 872 |
+
|
| 873 |
+
current_monthly_voice_df: pd.DataFrame | None = None
|
| 874 |
+
current_monthly_data_df: pd.DataFrame | None = None
|
| 875 |
+
current_sum_pre_post_df: pd.DataFrame | None = None
|
| 876 |
+
current_avg_pre_post_df: pd.DataFrame | None = None
|
| 877 |
+
current_availability_summary_all_df: pd.DataFrame | None = None
|
| 878 |
+
|
| 879 |
+
current_export_multi_rat_df: pd.DataFrame | None = None
|
| 880 |
+
current_export_persistent_df: pd.DataFrame | None = None
|
| 881 |
+
current_export_bytes: bytes | None = None
|
| 882 |
+
|
| 883 |
+
|
| 884 |
+
# --------------------------------------------------------------------------------------
|
| 885 |
+
# Widgets
|
| 886 |
+
# --------------------------------------------------------------------------------------
|
| 887 |
+
|
| 888 |
+
PLOTLY_CONFIG = {"displaylogo": False, "scrollZoom": True, "displayModeBar": True}
|
| 889 |
+
|
| 890 |
+
file_2g = pn.widgets.FileInput(name="2G Traffic Report", accept=".csv,.zip")
|
| 891 |
+
file_3g = pn.widgets.FileInput(name="3G Traffic Report", accept=".csv,.zip")
|
| 892 |
+
file_lte = pn.widgets.FileInput(name="LTE Traffic Report", accept=".csv,.zip")
|
| 893 |
+
|
| 894 |
+
pre_range = pn.widgets.DateRangePicker(name="Pre-period (from - to)")
|
| 895 |
+
post_range = pn.widgets.DateRangePicker(name="Post-period (from - to)")
|
| 896 |
+
last_range = pn.widgets.DateRangePicker(name="Last period (from - to)")
|
| 897 |
+
|
| 898 |
+
sla_2g = pn.widgets.FloatInput(name="2G TCH availability SLA (%)", value=98.0, step=0.1)
|
| 899 |
+
sla_3g = pn.widgets.FloatInput(
|
| 900 |
+
name="3G Cell availability SLA (%)", value=98.0, step=0.1
|
| 901 |
+
)
|
| 902 |
+
sla_lte = pn.widgets.FloatInput(
|
| 903 |
+
name="LTE Cell availability SLA (%)", value=98.0, step=0.1
|
| 904 |
+
)
|
| 905 |
+
|
| 906 |
+
number_of_top_trafic_sites = pn.widgets.IntInput(
|
| 907 |
+
name="Number of top traffic sites", value=25
|
| 908 |
+
)
|
| 909 |
+
|
| 910 |
+
min_persistent_days_widget = pn.widgets.IntInput(
|
| 911 |
+
name="Minimum consecutive days below SLA to flag persistent issue",
|
| 912 |
+
value=3,
|
| 913 |
+
)
|
| 914 |
+
|
| 915 |
+
top_critical_n_widget = pn.widgets.IntInput(
|
| 916 |
+
name="Number of top critical sites to display", value=25
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
run_button = pn.widgets.Button(name="Run analysis", button_type="primary")
|
| 920 |
+
|
| 921 |
+
status_pane = pn.pane.Alert(
|
| 922 |
+
"Upload the 3 reports, select the 3 periods and click 'Run analysis'",
|
| 923 |
+
alert_type="primary",
|
| 924 |
+
)
|
| 925 |
+
|
| 926 |
+
summary_table = pn.widgets.Tabulator(
|
| 927 |
+
height=250,
|
| 928 |
+
sizing_mode="stretch_width",
|
| 929 |
+
layout="fit_data_table",
|
| 930 |
+
)
|
| 931 |
+
|
| 932 |
+
sum_pre_post_table = pn.widgets.Tabulator(
|
| 933 |
+
height=250,
|
| 934 |
+
sizing_mode="stretch_width",
|
| 935 |
+
layout="fit_data_table",
|
| 936 |
+
)
|
| 937 |
+
summary_2g_table = pn.widgets.Tabulator(
|
| 938 |
+
height=250,
|
| 939 |
+
sizing_mode="stretch_width",
|
| 940 |
+
layout="fit_data_table",
|
| 941 |
+
)
|
| 942 |
+
worst_2g_table = pn.widgets.Tabulator(
|
| 943 |
+
height=250,
|
| 944 |
+
sizing_mode="stretch_width",
|
| 945 |
+
layout="fit_data_table",
|
| 946 |
+
)
|
| 947 |
+
summary_3g_table = pn.widgets.Tabulator(
|
| 948 |
+
height=250,
|
| 949 |
+
sizing_mode="stretch_width",
|
| 950 |
+
layout="fit_data_table",
|
| 951 |
+
)
|
| 952 |
+
worst_3g_table = pn.widgets.Tabulator(
|
| 953 |
+
height=250,
|
| 954 |
+
sizing_mode="stretch_width",
|
| 955 |
+
layout="fit_data_table",
|
| 956 |
+
)
|
| 957 |
+
summary_lte_table = pn.widgets.Tabulator(
|
| 958 |
+
height=250,
|
| 959 |
+
sizing_mode="stretch_width",
|
| 960 |
+
layout="fit_data_table",
|
| 961 |
+
)
|
| 962 |
+
worst_lte_table = pn.widgets.Tabulator(
|
| 963 |
+
height=250,
|
| 964 |
+
sizing_mode="stretch_width",
|
| 965 |
+
layout="fit_data_table",
|
| 966 |
+
)
|
| 967 |
+
multi_rat_table = pn.widgets.Tabulator(
|
| 968 |
+
height=250,
|
| 969 |
+
sizing_mode="stretch_width",
|
| 970 |
+
layout="fit_data_table",
|
| 971 |
+
)
|
| 972 |
+
persistent_table = pn.widgets.Tabulator(
|
| 973 |
+
height=250,
|
| 974 |
+
sizing_mode="stretch_width",
|
| 975 |
+
layout="fit_data_table",
|
| 976 |
+
)
|
| 977 |
+
|
| 978 |
+
site_select = pn.widgets.AutocompleteInput(
|
| 979 |
+
name="Select a site for detailed view (Type to search)",
|
| 980 |
+
options={},
|
| 981 |
+
case_sensitive=False,
|
| 982 |
+
search_strategy="includes",
|
| 983 |
+
restrict=True,
|
| 984 |
+
placeholder="Type site code or city...",
|
| 985 |
+
)
|
| 986 |
+
site_traffic_plot_pane = pn.pane.Plotly(
|
| 987 |
+
sizing_mode="stretch_both",
|
| 988 |
+
config=PLOTLY_CONFIG,
|
| 989 |
+
css_classes=["fullscreen-target-site-traffic"],
|
| 990 |
+
)
|
| 991 |
+
site_traffic_plot = pn.Column(
|
| 992 |
+
site_traffic_plot_pane,
|
| 993 |
+
height=400,
|
| 994 |
+
sizing_mode="stretch_width",
|
| 995 |
+
css_classes=["plot-fullscreen-wrapper", "site-traffic-wrapper"],
|
| 996 |
+
)
|
| 997 |
+
site_avail_plot_pane = pn.pane.Plotly(
|
| 998 |
+
sizing_mode="stretch_both",
|
| 999 |
+
config=PLOTLY_CONFIG,
|
| 1000 |
+
css_classes=["fullscreen-target-site-avail"],
|
| 1001 |
+
)
|
| 1002 |
+
site_avail_plot = pn.Column(
|
| 1003 |
+
site_avail_plot_pane,
|
| 1004 |
+
height=400,
|
| 1005 |
+
sizing_mode="stretch_width",
|
| 1006 |
+
css_classes=["plot-fullscreen-wrapper", "site-avail-wrapper"],
|
| 1007 |
+
)
|
| 1008 |
+
site_degraded_table = pn.widgets.Tabulator(
|
| 1009 |
+
height=200,
|
| 1010 |
+
sizing_mode="stretch_width",
|
| 1011 |
+
layout="fit_data_table",
|
| 1012 |
+
)
|
| 1013 |
+
|
| 1014 |
+
city_select = pn.widgets.AutocompleteInput(
|
| 1015 |
+
name="Select a City for aggregated view (Type to search)",
|
| 1016 |
+
options=[],
|
| 1017 |
+
case_sensitive=False,
|
| 1018 |
+
search_strategy="includes",
|
| 1019 |
+
restrict=True,
|
| 1020 |
+
placeholder="Type city name...",
|
| 1021 |
+
)
|
| 1022 |
+
city_traffic_plot_pane = pn.pane.Plotly(
|
| 1023 |
+
sizing_mode="stretch_both",
|
| 1024 |
+
config=PLOTLY_CONFIG,
|
| 1025 |
+
css_classes=["fullscreen-target-city-traffic"],
|
| 1026 |
+
)
|
| 1027 |
+
city_traffic_plot = pn.Column(
|
| 1028 |
+
city_traffic_plot_pane,
|
| 1029 |
+
height=400,
|
| 1030 |
+
sizing_mode="stretch_width",
|
| 1031 |
+
css_classes=["plot-fullscreen-wrapper", "city-traffic-wrapper"],
|
| 1032 |
+
)
|
| 1033 |
+
city_avail_plot_pane = pn.pane.Plotly(
|
| 1034 |
+
sizing_mode="stretch_both",
|
| 1035 |
+
config=PLOTLY_CONFIG,
|
| 1036 |
+
css_classes=["fullscreen-target-city-avail"],
|
| 1037 |
+
)
|
| 1038 |
+
city_avail_plot = pn.Column(
|
| 1039 |
+
city_avail_plot_pane,
|
| 1040 |
+
height=400,
|
| 1041 |
+
sizing_mode="stretch_width",
|
| 1042 |
+
css_classes=["plot-fullscreen-wrapper", "city-avail-wrapper"],
|
| 1043 |
+
)
|
| 1044 |
+
city_degraded_table = pn.widgets.Tabulator(
|
| 1045 |
+
height=200,
|
| 1046 |
+
sizing_mode="stretch_width",
|
| 1047 |
+
layout="fit_data_table",
|
| 1048 |
+
)
|
| 1049 |
+
|
| 1050 |
+
daily_avail_plot_pane = pn.pane.Plotly(
|
| 1051 |
+
sizing_mode="stretch_both",
|
| 1052 |
+
config=PLOTLY_CONFIG,
|
| 1053 |
+
css_classes=["fullscreen-target-daily-avail"],
|
| 1054 |
+
)
|
| 1055 |
+
daily_avail_plot = pn.Column(
|
| 1056 |
+
daily_avail_plot_pane,
|
| 1057 |
+
height=400,
|
| 1058 |
+
sizing_mode="stretch_width",
|
| 1059 |
+
css_classes=["plot-fullscreen-wrapper", "daily-avail-wrapper"],
|
| 1060 |
+
)
|
| 1061 |
+
daily_degraded_table = pn.widgets.Tabulator(
|
| 1062 |
+
height=200,
|
| 1063 |
+
sizing_mode="stretch_width",
|
| 1064 |
+
layout="fit_data_table",
|
| 1065 |
+
)
|
| 1066 |
+
|
| 1067 |
+
top_data_sites_table = pn.widgets.Tabulator(
|
| 1068 |
+
height=250,
|
| 1069 |
+
sizing_mode="stretch_width",
|
| 1070 |
+
layout="fit_data_table",
|
| 1071 |
+
)
|
| 1072 |
+
top_voice_sites_table = pn.widgets.Tabulator(
|
| 1073 |
+
height=250,
|
| 1074 |
+
sizing_mode="stretch_width",
|
| 1075 |
+
layout="fit_data_table",
|
| 1076 |
+
)
|
| 1077 |
+
top_data_bar_plot_pane = pn.pane.Plotly(
|
| 1078 |
+
sizing_mode="stretch_both",
|
| 1079 |
+
config=PLOTLY_CONFIG,
|
| 1080 |
+
css_classes=["fullscreen-target-top-data"],
|
| 1081 |
+
)
|
| 1082 |
+
top_data_bar_plot = pn.Column(
|
| 1083 |
+
top_data_bar_plot_pane,
|
| 1084 |
+
height=400,
|
| 1085 |
+
sizing_mode="stretch_width",
|
| 1086 |
+
css_classes=["plot-fullscreen-wrapper", "top-data-bar-wrapper"],
|
| 1087 |
+
)
|
| 1088 |
+
top_voice_bar_plot_pane = pn.pane.Plotly(
|
| 1089 |
+
sizing_mode="stretch_both",
|
| 1090 |
+
config=PLOTLY_CONFIG,
|
| 1091 |
+
css_classes=["fullscreen-target-top-voice"],
|
| 1092 |
+
)
|
| 1093 |
+
top_voice_bar_plot = pn.Column(
|
| 1094 |
+
top_voice_bar_plot_pane,
|
| 1095 |
+
height=400,
|
| 1096 |
+
sizing_mode="stretch_width",
|
| 1097 |
+
css_classes=["plot-fullscreen-wrapper", "top-voice-bar-wrapper"],
|
| 1098 |
+
)
|
| 1099 |
+
data_map_plot_pane = pn.pane.Plotly(
|
| 1100 |
+
sizing_mode="stretch_both",
|
| 1101 |
+
config=PLOTLY_CONFIG,
|
| 1102 |
+
css_classes=["fullscreen-target-data-map"],
|
| 1103 |
+
)
|
| 1104 |
+
data_map_plot = pn.Column(
|
| 1105 |
+
data_map_plot_pane,
|
| 1106 |
+
height=500,
|
| 1107 |
+
sizing_mode="stretch_width",
|
| 1108 |
+
css_classes=["plot-fullscreen-wrapper", "data-map-wrapper"],
|
| 1109 |
+
)
|
| 1110 |
+
voice_map_plot_pane = pn.pane.Plotly(
|
| 1111 |
+
sizing_mode="stretch_both",
|
| 1112 |
+
config=PLOTLY_CONFIG,
|
| 1113 |
+
css_classes=["fullscreen-target-voice-map"],
|
| 1114 |
+
)
|
| 1115 |
+
voice_map_plot = pn.Column(
|
| 1116 |
+
voice_map_plot_pane,
|
| 1117 |
+
height=500,
|
| 1118 |
+
sizing_mode="stretch_width",
|
| 1119 |
+
css_classes=["plot-fullscreen-wrapper", "voice-map-wrapper"],
|
| 1120 |
+
)
|
| 1121 |
+
|
| 1122 |
+
# Fullscreen helper logic has been replaced by client-side JS.
|
| 1123 |
+
|
| 1124 |
+
# Fullscreen buttons for each Plotly plot
|
| 1125 |
+
site_traffic_fullscreen_btn = pn.widgets.Button(
|
| 1126 |
+
name="Full screen site traffic", button_type="default"
|
| 1127 |
+
)
|
| 1128 |
+
site_avail_fullscreen_btn = pn.widgets.Button(
|
| 1129 |
+
name="Full screen site availability", button_type="default"
|
| 1130 |
+
)
|
| 1131 |
+
city_traffic_fullscreen_btn = pn.widgets.Button(
|
| 1132 |
+
name="Full screen city traffic", button_type="default"
|
| 1133 |
+
)
|
| 1134 |
+
city_avail_fullscreen_btn = pn.widgets.Button(
|
| 1135 |
+
name="Full screen city availability", button_type="default"
|
| 1136 |
+
)
|
| 1137 |
+
daily_avail_fullscreen_btn = pn.widgets.Button(
|
| 1138 |
+
name="Full screen daily availability", button_type="default"
|
| 1139 |
+
)
|
| 1140 |
+
top_data_fullscreen_btn = pn.widgets.Button(
|
| 1141 |
+
name="Full screen top data bar", button_type="default"
|
| 1142 |
+
)
|
| 1143 |
+
top_voice_fullscreen_btn = pn.widgets.Button(
|
| 1144 |
+
name="Full screen top voice bar", button_type="default"
|
| 1145 |
+
)
|
| 1146 |
+
data_map_fullscreen_btn = pn.widgets.Button(
|
| 1147 |
+
name="Full screen data map", button_type="default"
|
| 1148 |
+
)
|
| 1149 |
+
voice_map_fullscreen_btn = pn.widgets.Button(
|
| 1150 |
+
name="Full screen voice map", button_type="default"
|
| 1151 |
+
)
|
| 1152 |
+
|
| 1153 |
+
multi_rat_download = pn.widgets.FileDownload(
|
| 1154 |
+
label="Download Multi-RAT table (CSV)",
|
| 1155 |
+
filename="multi_rat_availability.csv",
|
| 1156 |
+
button_type="default",
|
| 1157 |
+
)
|
| 1158 |
+
|
| 1159 |
+
persistent_download = pn.widgets.FileDownload(
|
| 1160 |
+
label="Download persistent issues (CSV)",
|
| 1161 |
+
filename="persistent_issues.csv",
|
| 1162 |
+
button_type="default",
|
| 1163 |
+
)
|
| 1164 |
+
|
| 1165 |
+
top_data_download = pn.widgets.FileDownload(
|
| 1166 |
+
label="Download top data sites (CSV)",
|
| 1167 |
+
filename="top_data_sites.csv",
|
| 1168 |
+
button_type="default",
|
| 1169 |
+
)
|
| 1170 |
+
|
| 1171 |
+
top_voice_download = pn.widgets.FileDownload(
|
| 1172 |
+
label="Download top voice sites (CSV)",
|
| 1173 |
+
filename="top_voice_sites.csv",
|
| 1174 |
+
button_type="default",
|
| 1175 |
+
)
|
| 1176 |
+
|
| 1177 |
+
export_button = pn.widgets.FileDownload(
|
| 1178 |
+
label="Download the Analysis Report",
|
| 1179 |
+
filename="Global_Trafic_Analysis_Report.xlsx",
|
| 1180 |
+
button_type="primary",
|
| 1181 |
+
)
|
| 1182 |
+
|
| 1183 |
+
|
| 1184 |
+
# --------------------------------------------------------------------------------------
|
| 1185 |
+
# Callback
|
| 1186 |
+
# --------------------------------------------------------------------------------------
|
| 1187 |
+
|
| 1188 |
+
|
| 1189 |
+
def _validate_date_range(rng: tuple[date, date] | list[date], label: str) -> None:
|
| 1190 |
+
if not rng or len(rng) != 2:
|
| 1191 |
+
raise ValueError(f"Please select 2 dates for {label}.")
|
| 1192 |
+
if rng[0] is None or rng[1] is None:
|
| 1193 |
+
raise ValueError(f"Please select valid dates for {label}.")
|
| 1194 |
+
|
| 1195 |
+
|
| 1196 |
+
def run_analysis(event=None): # event param required by on_click
|
| 1197 |
+
try:
|
| 1198 |
+
status_pane.object = "Running analysis..."
|
| 1199 |
+
status_pane.alert_type = "primary"
|
| 1200 |
+
|
| 1201 |
+
global current_full_df, current_last_period_df
|
| 1202 |
+
global current_analysis_df, current_analysis_last_period_df
|
| 1203 |
+
global current_multi_rat_df, current_persistent_df
|
| 1204 |
+
global current_site_2g_avail, current_site_3g_avail, current_site_lte_avail
|
| 1205 |
+
global \
|
| 1206 |
+
current_summary_2g_avail, \
|
| 1207 |
+
current_summary_3g_avail, \
|
| 1208 |
+
current_summary_lte_avail
|
| 1209 |
+
global current_monthly_voice_df, current_monthly_data_df
|
| 1210 |
+
global current_sum_pre_post_df, current_avg_pre_post_df
|
| 1211 |
+
global current_availability_summary_all_df
|
| 1212 |
+
global current_export_multi_rat_df, current_export_persistent_df
|
| 1213 |
+
global current_export_bytes
|
| 1214 |
+
|
| 1215 |
+
# Basic validations
|
| 1216 |
+
if not (file_2g.value and file_3g.value and file_lte.value):
|
| 1217 |
+
raise ValueError("Please upload all 3 traffic reports (2G, 3G, LTE).")
|
| 1218 |
+
|
| 1219 |
+
_validate_date_range(pre_range.value, "pre-period")
|
| 1220 |
+
_validate_date_range(post_range.value, "post-period")
|
| 1221 |
+
_validate_date_range(last_range.value, "last period")
|
| 1222 |
+
|
| 1223 |
+
# Simple check on overlapping pre/post (same logic as Streamlit version, but lighter)
|
| 1224 |
+
pre_start, pre_end = pre_range.value
|
| 1225 |
+
post_start, post_end = post_range.value
|
| 1226 |
+
if pre_start == post_start and pre_end == post_end:
|
| 1227 |
+
raise ValueError("Pre and post periods are the same.")
|
| 1228 |
+
if pre_start < post_start and pre_end > post_end:
|
| 1229 |
+
raise ValueError("Pre and post periods are overlapping.")
|
| 1230 |
+
|
| 1231 |
+
df_2g = read_fileinput_to_df(file_2g)
|
| 1232 |
+
df_3g = read_fileinput_to_df(file_3g)
|
| 1233 |
+
df_lte = read_fileinput_to_df(file_lte)
|
| 1234 |
+
|
| 1235 |
+
if df_2g is None or df_3g is None or df_lte is None:
|
| 1236 |
+
raise ValueError("Failed to read one or more input files.")
|
| 1237 |
+
|
| 1238 |
+
summary = pd.DataFrame(
|
| 1239 |
+
{
|
| 1240 |
+
"Dataset": ["2G", "3G", "LTE"],
|
| 1241 |
+
"Rows": [len(df_2g), len(df_3g), len(df_lte)],
|
| 1242 |
+
"Columns": [df_2g.shape[1], df_3g.shape[1], df_lte.shape[1]],
|
| 1243 |
+
}
|
| 1244 |
+
)
|
| 1245 |
+
summary_table.value = summary
|
| 1246 |
+
|
| 1247 |
+
df_2g_clean = preprocess_2g(df_2g)
|
| 1248 |
+
df_3g_clean = preprocess_3g(df_3g)
|
| 1249 |
+
df_lte_clean = preprocess_lte(df_lte)
|
| 1250 |
+
|
| 1251 |
+
full_df, last_period, sum_pre_post_analysis, avg_pre_post_analysis = (
|
| 1252 |
+
merge_and_compare(
|
| 1253 |
+
df_2g_clean,
|
| 1254 |
+
df_3g_clean,
|
| 1255 |
+
df_lte_clean,
|
| 1256 |
+
pre_range.value,
|
| 1257 |
+
post_range.value,
|
| 1258 |
+
last_range.value,
|
| 1259 |
+
)
|
| 1260 |
+
)
|
| 1261 |
+
|
| 1262 |
+
monthly_voice_df, monthly_data_df = monthly_data_analysis(full_df)
|
| 1263 |
+
|
| 1264 |
+
analysis_df = full_df
|
| 1265 |
+
|
| 1266 |
+
# Persist global state for later drill-down / export
|
| 1267 |
+
current_full_df = full_df
|
| 1268 |
+
current_last_period_df = last_period
|
| 1269 |
+
current_analysis_df = analysis_df
|
| 1270 |
+
current_analysis_last_period_df = last_period
|
| 1271 |
+
current_monthly_voice_df = monthly_voice_df
|
| 1272 |
+
current_monthly_data_df = monthly_data_df
|
| 1273 |
+
current_sum_pre_post_df = sum_pre_post_analysis
|
| 1274 |
+
current_avg_pre_post_df = avg_pre_post_analysis
|
| 1275 |
+
|
| 1276 |
+
sum_pre_post_table.value = sum_pre_post_analysis
|
| 1277 |
+
|
| 1278 |
+
summary_2g_avail, site_2g_avail = analyze_2g_availability(
|
| 1279 |
+
analysis_df, float(sla_2g.value)
|
| 1280 |
+
)
|
| 1281 |
+
if summary_2g_avail is not None:
|
| 1282 |
+
summary_2g_table.value = summary_2g_avail.round(2)
|
| 1283 |
+
worst_sites_2g = site_2g_avail.sort_values("tch_avail_post").head(25)
|
| 1284 |
+
worst_2g_table.value = worst_sites_2g.round(2)
|
| 1285 |
+
else:
|
| 1286 |
+
summary_2g_table.value = pd.DataFrame()
|
| 1287 |
+
worst_2g_table.value = pd.DataFrame()
|
| 1288 |
+
|
| 1289 |
+
current_summary_2g_avail = summary_2g_avail
|
| 1290 |
+
current_site_2g_avail = site_2g_avail if summary_2g_avail is not None else None
|
| 1291 |
+
|
| 1292 |
+
summary_3g_avail, site_3g_avail = analyze_3g_availability(
|
| 1293 |
+
analysis_df, float(sla_3g.value)
|
| 1294 |
+
)
|
| 1295 |
+
if summary_3g_avail is not None:
|
| 1296 |
+
summary_3g_table.value = summary_3g_avail.round(2)
|
| 1297 |
+
worst_sites_3g = site_3g_avail.sort_values("cell_avail_post").head(25)
|
| 1298 |
+
worst_3g_table.value = worst_sites_3g.round(2)
|
| 1299 |
+
else:
|
| 1300 |
+
summary_3g_table.value = pd.DataFrame()
|
| 1301 |
+
worst_3g_table.value = pd.DataFrame()
|
| 1302 |
+
|
| 1303 |
+
current_summary_3g_avail = summary_3g_avail
|
| 1304 |
+
current_site_3g_avail = site_3g_avail if summary_3g_avail is not None else None
|
| 1305 |
+
|
| 1306 |
+
summary_lte_avail, site_lte_avail = analyze_lte_availability(
|
| 1307 |
+
analysis_df, float(sla_lte.value)
|
| 1308 |
+
)
|
| 1309 |
+
if summary_lte_avail is not None:
|
| 1310 |
+
summary_lte_table.value = summary_lte_avail.round(2)
|
| 1311 |
+
worst_sites_lte = site_lte_avail.sort_values("lte_avail_post").head(25)
|
| 1312 |
+
worst_lte_table.value = worst_sites_lte.round(2)
|
| 1313 |
+
else:
|
| 1314 |
+
summary_lte_table.value = pd.DataFrame()
|
| 1315 |
+
worst_lte_table.value = pd.DataFrame()
|
| 1316 |
+
|
| 1317 |
+
current_summary_lte_avail = summary_lte_avail
|
| 1318 |
+
current_site_lte_avail = (
|
| 1319 |
+
site_lte_avail if summary_lte_avail is not None else None
|
| 1320 |
+
)
|
| 1321 |
+
|
| 1322 |
+
# Build availability summary across RATs for export
|
| 1323 |
+
availability_frames = []
|
| 1324 |
+
if summary_2g_avail is not None:
|
| 1325 |
+
tmp = summary_2g_avail.copy()
|
| 1326 |
+
tmp["RAT"] = "2G"
|
| 1327 |
+
availability_frames.append(tmp)
|
| 1328 |
+
if summary_3g_avail is not None:
|
| 1329 |
+
tmp = summary_3g_avail.copy()
|
| 1330 |
+
tmp["RAT"] = "3G"
|
| 1331 |
+
availability_frames.append(tmp)
|
| 1332 |
+
if summary_lte_avail is not None:
|
| 1333 |
+
tmp = summary_lte_avail.copy()
|
| 1334 |
+
tmp["RAT"] = "LTE"
|
| 1335 |
+
availability_frames.append(tmp)
|
| 1336 |
+
|
| 1337 |
+
current_availability_summary_all_df = (
|
| 1338 |
+
pd.concat(availability_frames, ignore_index=True)
|
| 1339 |
+
if availability_frames
|
| 1340 |
+
else pd.DataFrame()
|
| 1341 |
+
)
|
| 1342 |
+
|
| 1343 |
+
multi_rat_df = analyze_multirat_availability(
|
| 1344 |
+
analysis_df,
|
| 1345 |
+
float(sla_2g.value),
|
| 1346 |
+
float(sla_3g.value),
|
| 1347 |
+
float(sla_lte.value),
|
| 1348 |
+
)
|
| 1349 |
+
if multi_rat_df is not None:
|
| 1350 |
+
multi_rat_table.value = multi_rat_df.round(2)
|
| 1351 |
+
else:
|
| 1352 |
+
multi_rat_table.value = pd.DataFrame()
|
| 1353 |
+
|
| 1354 |
+
current_multi_rat_df = multi_rat_df if multi_rat_df is not None else None
|
| 1355 |
+
|
| 1356 |
+
# Persistent availability (UI uses configurable threshold, export keeps 3 days)
|
| 1357 |
+
persistent_df = pd.DataFrame()
|
| 1358 |
+
if multi_rat_df is not None:
|
| 1359 |
+
persistent_df = analyze_persistent_availability(
|
| 1360 |
+
analysis_df,
|
| 1361 |
+
multi_rat_df,
|
| 1362 |
+
float(sla_2g.value),
|
| 1363 |
+
float(sla_3g.value),
|
| 1364 |
+
float(sla_lte.value),
|
| 1365 |
+
int(min_persistent_days_widget.value),
|
| 1366 |
+
)
|
| 1367 |
+
|
| 1368 |
+
current_persistent_df = (
|
| 1369 |
+
persistent_df
|
| 1370 |
+
if persistent_df is not None and not persistent_df.empty
|
| 1371 |
+
else None
|
| 1372 |
+
)
|
| 1373 |
+
|
| 1374 |
+
# Export-specific multi-RAT & persistent (based on full_df as in Streamlit app)
|
| 1375 |
+
export_multi_rat_base = analyze_multirat_availability(
|
| 1376 |
+
full_df,
|
| 1377 |
+
float(sla_2g.value),
|
| 1378 |
+
float(sla_3g.value),
|
| 1379 |
+
float(sla_lte.value),
|
| 1380 |
+
)
|
| 1381 |
+
current_export_multi_rat_df = (
|
| 1382 |
+
export_multi_rat_base
|
| 1383 |
+
if export_multi_rat_base is not None
|
| 1384 |
+
else pd.DataFrame()
|
| 1385 |
+
)
|
| 1386 |
+
|
| 1387 |
+
export_persistent_tmp = pd.DataFrame()
|
| 1388 |
+
if export_multi_rat_base is not None:
|
| 1389 |
+
export_persistent_tmp = analyze_persistent_availability(
|
| 1390 |
+
full_df,
|
| 1391 |
+
export_multi_rat_base,
|
| 1392 |
+
float(sla_2g.value),
|
| 1393 |
+
float(sla_3g.value),
|
| 1394 |
+
float(sla_lte.value),
|
| 1395 |
+
3,
|
| 1396 |
+
)
|
| 1397 |
+
current_export_persistent_df = (
|
| 1398 |
+
export_persistent_tmp
|
| 1399 |
+
if export_persistent_tmp is not None and not export_persistent_tmp.empty
|
| 1400 |
+
else pd.DataFrame()
|
| 1401 |
+
)
|
| 1402 |
+
|
| 1403 |
+
# Precompute export bytes so the download button is instant
|
| 1404 |
+
current_export_bytes = _build_export_bytes()
|
| 1405 |
+
|
| 1406 |
+
# Update export filename with timestamp for clarity
|
| 1407 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1408 |
+
export_button.filename = f"Global_Trafic_Analysis_Report_{timestamp}.xlsx"
|
| 1409 |
+
|
| 1410 |
+
# Update all drill-down & map views
|
| 1411 |
+
_update_site_controls()
|
| 1412 |
+
_update_city_controls()
|
| 1413 |
+
_update_daily_availability_view()
|
| 1414 |
+
_update_top_sites_and_maps()
|
| 1415 |
+
_update_persistent_table_view()
|
| 1416 |
+
|
| 1417 |
+
status_pane.alert_type = "success"
|
| 1418 |
+
status_pane.object = "Analysis completed."
|
| 1419 |
+
|
| 1420 |
+
except Exception as exc: # noqa: BLE001
|
| 1421 |
+
status_pane.alert_type = "danger"
|
| 1422 |
+
status_pane.object = f"Error: {exc}"
|
| 1423 |
+
|
| 1424 |
+
|
| 1425 |
+
run_button.on_click(run_analysis)
|
| 1426 |
+
|
| 1427 |
+
|
| 1428 |
+
def _update_site_controls() -> None:
|
| 1429 |
+
"""Populate site selection widget based on current_analysis_df and refresh view."""
|
| 1430 |
+
if current_analysis_df is None or current_analysis_df.empty:
|
| 1431 |
+
site_select.options = {}
|
| 1432 |
+
site_select.value = None
|
| 1433 |
+
site_traffic_plot_pane.object = None
|
| 1434 |
+
site_avail_plot_pane.object = None
|
| 1435 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1436 |
+
return
|
| 1437 |
+
|
| 1438 |
+
sites_df = (
|
| 1439 |
+
current_analysis_df[["code", "City"]]
|
| 1440 |
+
.drop_duplicates()
|
| 1441 |
+
.sort_values(by=["City", "code"])
|
| 1442 |
+
)
|
| 1443 |
+
|
| 1444 |
+
options: dict[str, int] = {}
|
| 1445 |
+
for _, row in sites_df.iterrows():
|
| 1446 |
+
label = (
|
| 1447 |
+
f"{row['City']}_{row['code']}"
|
| 1448 |
+
if pd.notna(row["City"])
|
| 1449 |
+
else str(row["code"])
|
| 1450 |
+
)
|
| 1451 |
+
options[label] = int(row["code"])
|
| 1452 |
+
|
| 1453 |
+
site_select.options = options
|
| 1454 |
+
if options and site_select.value not in options.values():
|
| 1455 |
+
# When options is a dict, Select.value is the mapped value (code)
|
| 1456 |
+
site_select.value = next(iter(options.values()))
|
| 1457 |
+
|
| 1458 |
+
_update_site_view()
|
| 1459 |
+
|
| 1460 |
+
|
| 1461 |
+
def _update_site_view(event=None) -> None: # noqa: D401, ARG001
|
| 1462 |
+
"""Update site drill-down plots and table from current_analysis_df and site_select."""
|
| 1463 |
+
if current_analysis_df is None or current_analysis_df.empty:
|
| 1464 |
+
site_traffic_plot_pane.object = None
|
| 1465 |
+
site_avail_plot_pane.object = None
|
| 1466 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1467 |
+
return
|
| 1468 |
+
|
| 1469 |
+
selected_code = site_select.value
|
| 1470 |
+
if selected_code is None:
|
| 1471 |
+
site_traffic_plot_pane.object = None
|
| 1472 |
+
site_avail_plot_pane.object = None
|
| 1473 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1474 |
+
return
|
| 1475 |
+
|
| 1476 |
+
site_detail_df = current_analysis_df[
|
| 1477 |
+
current_analysis_df["code"] == int(selected_code)
|
| 1478 |
+
].copy()
|
| 1479 |
+
if site_detail_df.empty:
|
| 1480 |
+
site_traffic_plot_pane.object = None
|
| 1481 |
+
site_avail_plot_pane.object = None
|
| 1482 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1483 |
+
return
|
| 1484 |
+
|
| 1485 |
+
site_detail_df = site_detail_df.sort_values("date")
|
| 1486 |
+
|
| 1487 |
+
# Traffic over time
|
| 1488 |
+
traffic_cols = [
|
| 1489 |
+
col
|
| 1490 |
+
for col in ["total_voice_trafic", "total_data_trafic"]
|
| 1491 |
+
if col in site_detail_df.columns
|
| 1492 |
+
]
|
| 1493 |
+
first_row = site_detail_df.iloc[0]
|
| 1494 |
+
site_label = f"{first_row['code']}"
|
| 1495 |
+
if pd.notna(first_row.get("City")):
|
| 1496 |
+
site_label += f" ({first_row['City']})"
|
| 1497 |
+
|
| 1498 |
+
if traffic_cols:
|
| 1499 |
+
traffic_long = site_detail_df[["date"] + traffic_cols].melt(
|
| 1500 |
+
id_vars="date",
|
| 1501 |
+
value_vars=traffic_cols,
|
| 1502 |
+
var_name="metric",
|
| 1503 |
+
value_name="value",
|
| 1504 |
+
)
|
| 1505 |
+
fig_traffic = px.line(
|
| 1506 |
+
traffic_long,
|
| 1507 |
+
x="date",
|
| 1508 |
+
y="value",
|
| 1509 |
+
color="metric",
|
| 1510 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1511 |
+
)
|
| 1512 |
+
fig_traffic.update_layout(
|
| 1513 |
+
title=f"Traffic Evolution - Site: {site_label}",
|
| 1514 |
+
template="plotly_white",
|
| 1515 |
+
plot_bgcolor="white",
|
| 1516 |
+
paper_bgcolor="white",
|
| 1517 |
+
)
|
| 1518 |
+
site_traffic_plot_pane.object = fig_traffic
|
| 1519 |
+
else:
|
| 1520 |
+
site_traffic_plot_pane.object = None
|
| 1521 |
+
|
| 1522 |
+
# Availability over time per RAT
|
| 1523 |
+
avail_cols: list[str] = []
|
| 1524 |
+
rename_map: dict[str, str] = {}
|
| 1525 |
+
if "2g_tch_avail" in site_detail_df.columns:
|
| 1526 |
+
avail_cols.append("2g_tch_avail")
|
| 1527 |
+
rename_map["2g_tch_avail"] = "2G"
|
| 1528 |
+
if "3g_cell_avail" in site_detail_df.columns:
|
| 1529 |
+
avail_cols.append("3g_cell_avail")
|
| 1530 |
+
rename_map["3g_cell_avail"] = "3G"
|
| 1531 |
+
if "lte_cell_avail" in site_detail_df.columns:
|
| 1532 |
+
avail_cols.append("lte_cell_avail")
|
| 1533 |
+
rename_map["lte_cell_avail"] = "LTE"
|
| 1534 |
+
|
| 1535 |
+
if avail_cols:
|
| 1536 |
+
avail_df = site_detail_df[["date"] + avail_cols].copy()
|
| 1537 |
+
avail_df = avail_df.rename(columns=rename_map)
|
| 1538 |
+
value_cols = [c for c in avail_df.columns if c != "date"]
|
| 1539 |
+
avail_long = avail_df.melt(
|
| 1540 |
+
id_vars="date",
|
| 1541 |
+
value_vars=value_cols,
|
| 1542 |
+
var_name="RAT",
|
| 1543 |
+
value_name="availability",
|
| 1544 |
+
)
|
| 1545 |
+
fig_avail = px.line(
|
| 1546 |
+
avail_long,
|
| 1547 |
+
x="date",
|
| 1548 |
+
y="availability",
|
| 1549 |
+
color="RAT",
|
| 1550 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1551 |
+
)
|
| 1552 |
+
fig_avail.update_layout(
|
| 1553 |
+
title=f"Availability vs SLA - Site: {site_label}",
|
| 1554 |
+
template="plotly_white",
|
| 1555 |
+
plot_bgcolor="white",
|
| 1556 |
+
paper_bgcolor="white",
|
| 1557 |
+
)
|
| 1558 |
+
site_avail_plot_pane.object = fig_avail
|
| 1559 |
+
|
| 1560 |
+
# Days with availability below SLA per RAT
|
| 1561 |
+
site_detail_df["date_only"] = site_detail_df["date"].dt.date
|
| 1562 |
+
degraded_rows_site: list[dict] = []
|
| 1563 |
+
for rat_col, rat_name, sla_value in [
|
| 1564 |
+
("2g_tch_avail", "2G", float(sla_2g.value)),
|
| 1565 |
+
("3g_cell_avail", "3G", float(sla_3g.value)),
|
| 1566 |
+
("lte_cell_avail", "LTE", float(sla_lte.value)),
|
| 1567 |
+
]:
|
| 1568 |
+
if rat_col in site_detail_df.columns:
|
| 1569 |
+
daily_site = (
|
| 1570 |
+
site_detail_df.groupby("date_only")[rat_col].mean().dropna()
|
| 1571 |
+
)
|
| 1572 |
+
mask = daily_site < sla_value
|
| 1573 |
+
for d, val in daily_site[mask].items():
|
| 1574 |
+
degraded_rows_site.append(
|
| 1575 |
+
{
|
| 1576 |
+
"RAT": rat_name,
|
| 1577 |
+
"date": d,
|
| 1578 |
+
"avg_availability": val,
|
| 1579 |
+
"SLA": sla_value,
|
| 1580 |
+
}
|
| 1581 |
+
)
|
| 1582 |
+
if degraded_rows_site:
|
| 1583 |
+
degraded_site_df = pd.DataFrame(degraded_rows_site)
|
| 1584 |
+
site_degraded_table.value = degraded_site_df.round(2)
|
| 1585 |
+
else:
|
| 1586 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1587 |
+
else:
|
| 1588 |
+
site_avail_plot_pane.object = None
|
| 1589 |
+
site_degraded_table.value = pd.DataFrame()
|
| 1590 |
+
|
| 1591 |
+
|
| 1592 |
+
def _update_city_controls() -> None:
|
| 1593 |
+
"""Populate city selection widget based on current_analysis_df and refresh view."""
|
| 1594 |
+
if current_analysis_df is None or current_analysis_df.empty:
|
| 1595 |
+
city_select.options = []
|
| 1596 |
+
city_select.value = None
|
| 1597 |
+
city_traffic_plot_pane.object = None
|
| 1598 |
+
city_avail_plot_pane.object = None
|
| 1599 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1600 |
+
return
|
| 1601 |
+
|
| 1602 |
+
if (
|
| 1603 |
+
"City" not in current_analysis_df.columns
|
| 1604 |
+
or not current_analysis_df["City"].notna().any()
|
| 1605 |
+
):
|
| 1606 |
+
city_select.options = []
|
| 1607 |
+
city_select.value = None
|
| 1608 |
+
city_traffic_plot_pane.object = None
|
| 1609 |
+
city_avail_plot_pane.object = pd.DataFrame()
|
| 1610 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1611 |
+
return
|
| 1612 |
+
|
| 1613 |
+
cities_df = (
|
| 1614 |
+
current_analysis_df[["City"]].dropna().drop_duplicates().sort_values(by="City")
|
| 1615 |
+
)
|
| 1616 |
+
options = cities_df["City"].tolist()
|
| 1617 |
+
city_select.options = options
|
| 1618 |
+
if options and city_select.value not in options:
|
| 1619 |
+
city_select.value = options[0]
|
| 1620 |
+
|
| 1621 |
+
_update_city_view()
|
| 1622 |
+
|
| 1623 |
+
|
| 1624 |
+
def _update_city_view(event=None) -> None: # noqa: D401, ARG001
|
| 1625 |
+
"""Update city drill-down plots and degraded days table based on city_select."""
|
| 1626 |
+
if current_analysis_df is None or current_analysis_df.empty:
|
| 1627 |
+
city_traffic_plot_pane.object = None
|
| 1628 |
+
city_avail_plot_pane.object = None
|
| 1629 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1630 |
+
return
|
| 1631 |
+
|
| 1632 |
+
selected_city = city_select.value
|
| 1633 |
+
if not selected_city:
|
| 1634 |
+
city_traffic_plot_pane.object = None
|
| 1635 |
+
city_avail_plot_pane.object = None
|
| 1636 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1637 |
+
return
|
| 1638 |
+
|
| 1639 |
+
city_detail_df = current_analysis_df[
|
| 1640 |
+
current_analysis_df["City"] == selected_city
|
| 1641 |
+
].copy()
|
| 1642 |
+
if city_detail_df.empty:
|
| 1643 |
+
city_traffic_plot_pane.object = None
|
| 1644 |
+
city_avail_plot_pane.object = None
|
| 1645 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1646 |
+
return
|
| 1647 |
+
|
| 1648 |
+
city_detail_df = city_detail_df.sort_values("date")
|
| 1649 |
+
|
| 1650 |
+
# Traffic aggregated at city level
|
| 1651 |
+
traffic_cols_city = [
|
| 1652 |
+
col
|
| 1653 |
+
for col in ["total_voice_trafic", "total_data_trafic"]
|
| 1654 |
+
if col in city_detail_df.columns
|
| 1655 |
+
]
|
| 1656 |
+
if traffic_cols_city:
|
| 1657 |
+
city_traffic = (
|
| 1658 |
+
city_detail_df.groupby("date")[traffic_cols_city].sum().reset_index()
|
| 1659 |
+
)
|
| 1660 |
+
traffic_long_city = city_traffic.melt(
|
| 1661 |
+
id_vars="date",
|
| 1662 |
+
value_vars=traffic_cols_city,
|
| 1663 |
+
var_name="metric",
|
| 1664 |
+
value_name="value",
|
| 1665 |
+
)
|
| 1666 |
+
fig_traffic_city = px.line(
|
| 1667 |
+
traffic_long_city,
|
| 1668 |
+
x="date",
|
| 1669 |
+
y="value",
|
| 1670 |
+
color="metric",
|
| 1671 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1672 |
+
)
|
| 1673 |
+
fig_traffic_city.update_layout(
|
| 1674 |
+
title=f"Total Traffic Evolution - City: {selected_city}",
|
| 1675 |
+
template="plotly_white",
|
| 1676 |
+
plot_bgcolor="white",
|
| 1677 |
+
paper_bgcolor="white",
|
| 1678 |
+
)
|
| 1679 |
+
city_traffic_plot_pane.object = fig_traffic_city
|
| 1680 |
+
else:
|
| 1681 |
+
city_traffic_plot_pane.object = None
|
| 1682 |
+
|
| 1683 |
+
# Availability aggregated at city level
|
| 1684 |
+
avail_cols_city: list[str] = []
|
| 1685 |
+
rename_map_city: dict[str, str] = {}
|
| 1686 |
+
if "2g_tch_avail" in city_detail_df.columns:
|
| 1687 |
+
avail_cols_city.append("2g_tch_avail")
|
| 1688 |
+
rename_map_city["2g_tch_avail"] = "2G"
|
| 1689 |
+
if "3g_cell_avail" in city_detail_df.columns:
|
| 1690 |
+
avail_cols_city.append("3g_cell_avail")
|
| 1691 |
+
rename_map_city["3g_cell_avail"] = "3G"
|
| 1692 |
+
if "lte_cell_avail" in city_detail_df.columns:
|
| 1693 |
+
avail_cols_city.append("lte_cell_avail")
|
| 1694 |
+
rename_map_city["lte_cell_avail"] = "LTE"
|
| 1695 |
+
|
| 1696 |
+
if avail_cols_city:
|
| 1697 |
+
avail_city_df = city_detail_df[["date"] + avail_cols_city].copy()
|
| 1698 |
+
avail_city_df = avail_city_df.rename(columns=rename_map_city)
|
| 1699 |
+
value_cols_city = [c for c in avail_city_df.columns if c != "date"]
|
| 1700 |
+
avail_long_city = avail_city_df.melt(
|
| 1701 |
+
id_vars="date",
|
| 1702 |
+
value_vars=value_cols_city,
|
| 1703 |
+
var_name="RAT",
|
| 1704 |
+
value_name="availability",
|
| 1705 |
+
)
|
| 1706 |
+
fig_avail_city = px.line(
|
| 1707 |
+
avail_long_city,
|
| 1708 |
+
x="date",
|
| 1709 |
+
y="availability",
|
| 1710 |
+
color="RAT",
|
| 1711 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1712 |
+
)
|
| 1713 |
+
fig_avail_city.update_layout(
|
| 1714 |
+
title=f"Availability vs SLA - City: {selected_city}",
|
| 1715 |
+
template="plotly_white",
|
| 1716 |
+
plot_bgcolor="white",
|
| 1717 |
+
paper_bgcolor="white",
|
| 1718 |
+
)
|
| 1719 |
+
city_avail_plot_pane.object = fig_avail_city
|
| 1720 |
+
|
| 1721 |
+
city_detail_df["date_only"] = city_detail_df["date"].dt.date
|
| 1722 |
+
degraded_rows_city: list[dict] = []
|
| 1723 |
+
for rat_col, rat_name, sla_value in [
|
| 1724 |
+
("2g_tch_avail", "2G", float(sla_2g.value)),
|
| 1725 |
+
("3g_cell_avail", "3G", float(sla_3g.value)),
|
| 1726 |
+
("lte_cell_avail", "LTE", float(sla_lte.value)),
|
| 1727 |
+
]:
|
| 1728 |
+
if rat_col in city_detail_df.columns:
|
| 1729 |
+
daily_city = (
|
| 1730 |
+
city_detail_df.groupby("date_only")[rat_col].mean().dropna()
|
| 1731 |
+
)
|
| 1732 |
+
mask_city = daily_city < sla_value
|
| 1733 |
+
for d, val in daily_city[mask_city].items():
|
| 1734 |
+
degraded_rows_city.append(
|
| 1735 |
+
{
|
| 1736 |
+
"RAT": rat_name,
|
| 1737 |
+
"date": d,
|
| 1738 |
+
"avg_availability": val,
|
| 1739 |
+
"SLA": sla_value,
|
| 1740 |
+
}
|
| 1741 |
+
)
|
| 1742 |
+
if degraded_rows_city:
|
| 1743 |
+
degraded_city_df = pd.DataFrame(degraded_rows_city)
|
| 1744 |
+
city_degraded_table.value = degraded_city_df.round(2)
|
| 1745 |
+
else:
|
| 1746 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1747 |
+
else:
|
| 1748 |
+
city_avail_plot_pane.object = None
|
| 1749 |
+
city_degraded_table.value = pd.DataFrame()
|
| 1750 |
+
|
| 1751 |
+
|
| 1752 |
+
def _update_daily_availability_view() -> None:
|
| 1753 |
+
"""Daily average availability per RAT over the full analysis_df."""
|
| 1754 |
+
if current_analysis_df is None or current_analysis_df.empty:
|
| 1755 |
+
daily_avail_plot_pane.object = None
|
| 1756 |
+
daily_degraded_table.value = pd.DataFrame()
|
| 1757 |
+
return
|
| 1758 |
+
|
| 1759 |
+
temp_df = current_analysis_df.copy()
|
| 1760 |
+
if not any(
|
| 1761 |
+
col in temp_df.columns
|
| 1762 |
+
for col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]
|
| 1763 |
+
):
|
| 1764 |
+
daily_avail_plot_pane.object = None
|
| 1765 |
+
daily_degraded_table.value = pd.DataFrame()
|
| 1766 |
+
return
|
| 1767 |
+
|
| 1768 |
+
temp_df["date_only"] = temp_df["date"].dt.date
|
| 1769 |
+
|
| 1770 |
+
agg_dict: dict[str, str] = {}
|
| 1771 |
+
if "2g_tch_avail" in temp_df.columns:
|
| 1772 |
+
agg_dict["2g_tch_avail"] = "mean"
|
| 1773 |
+
if "3g_cell_avail" in temp_df.columns:
|
| 1774 |
+
agg_dict["3g_cell_avail"] = "mean"
|
| 1775 |
+
if "lte_cell_avail" in temp_df.columns:
|
| 1776 |
+
agg_dict["lte_cell_avail"] = "mean"
|
| 1777 |
+
|
| 1778 |
+
daily_avail = (
|
| 1779 |
+
temp_df.groupby("date_only", as_index=False).agg(agg_dict)
|
| 1780 |
+
if agg_dict
|
| 1781 |
+
else pd.DataFrame()
|
| 1782 |
+
)
|
| 1783 |
+
|
| 1784 |
+
if daily_avail.empty:
|
| 1785 |
+
daily_avail_plot_pane.object = None
|
| 1786 |
+
daily_degraded_table.value = pd.DataFrame()
|
| 1787 |
+
return
|
| 1788 |
+
|
| 1789 |
+
rename_map: dict[str, str] = {}
|
| 1790 |
+
if "2g_tch_avail" in daily_avail.columns:
|
| 1791 |
+
rename_map["2g_tch_avail"] = "2G"
|
| 1792 |
+
if "3g_cell_avail" in daily_avail.columns:
|
| 1793 |
+
rename_map["3g_cell_avail"] = "3G"
|
| 1794 |
+
if "lte_cell_avail" in daily_avail.columns:
|
| 1795 |
+
rename_map["lte_cell_avail"] = "LTE"
|
| 1796 |
+
|
| 1797 |
+
daily_avail = daily_avail.rename(columns=rename_map)
|
| 1798 |
+
|
| 1799 |
+
value_cols = [c for c in daily_avail.columns if c != "date_only"]
|
| 1800 |
+
if not value_cols:
|
| 1801 |
+
daily_avail_plot_pane.object = None
|
| 1802 |
+
daily_degraded_table.value = pd.DataFrame()
|
| 1803 |
+
return
|
| 1804 |
+
|
| 1805 |
+
daily_melt = daily_avail.melt(
|
| 1806 |
+
id_vars="date_only",
|
| 1807 |
+
value_vars=value_cols,
|
| 1808 |
+
var_name="RAT",
|
| 1809 |
+
value_name="availability",
|
| 1810 |
+
)
|
| 1811 |
+
|
| 1812 |
+
fig = px.line(
|
| 1813 |
+
daily_melt,
|
| 1814 |
+
x="date_only",
|
| 1815 |
+
y="availability",
|
| 1816 |
+
color="RAT",
|
| 1817 |
+
markers=True,
|
| 1818 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1819 |
+
)
|
| 1820 |
+
fig.update_layout(
|
| 1821 |
+
template="plotly_white",
|
| 1822 |
+
plot_bgcolor="white",
|
| 1823 |
+
paper_bgcolor="white",
|
| 1824 |
+
)
|
| 1825 |
+
daily_avail_plot_pane.object = fig
|
| 1826 |
+
|
| 1827 |
+
degraded_rows: list[dict] = []
|
| 1828 |
+
for rat_name, sla_value in [
|
| 1829 |
+
("2G", float(sla_2g.value)),
|
| 1830 |
+
("3G", float(sla_3g.value)),
|
| 1831 |
+
("LTE", float(sla_lte.value)),
|
| 1832 |
+
]:
|
| 1833 |
+
if rat_name in daily_avail.columns:
|
| 1834 |
+
series = daily_avail[rat_name]
|
| 1835 |
+
mask = series < sla_value
|
| 1836 |
+
for d, val in zip(daily_avail.loc[mask, "date_only"], series[mask]):
|
| 1837 |
+
degraded_rows.append(
|
| 1838 |
+
{
|
| 1839 |
+
"RAT": rat_name,
|
| 1840 |
+
"date": d,
|
| 1841 |
+
"avg_availability": val,
|
| 1842 |
+
"SLA": sla_value,
|
| 1843 |
+
}
|
| 1844 |
+
)
|
| 1845 |
+
|
| 1846 |
+
if degraded_rows:
|
| 1847 |
+
degraded_df = pd.DataFrame(degraded_rows)
|
| 1848 |
+
daily_degraded_table.value = degraded_df.round(2)
|
| 1849 |
+
else:
|
| 1850 |
+
daily_degraded_table.value = pd.DataFrame()
|
| 1851 |
+
|
| 1852 |
+
|
| 1853 |
+
def _update_top_sites_and_maps() -> None:
|
| 1854 |
+
"""Top traffic sites and geographic maps based on last analysis period."""
|
| 1855 |
+
if current_analysis_last_period_df is None or current_analysis_last_period_df.empty:
|
| 1856 |
+
top_data_sites_table.value = pd.DataFrame()
|
| 1857 |
+
top_voice_sites_table.value = pd.DataFrame()
|
| 1858 |
+
top_data_bar_plot_pane.object = None
|
| 1859 |
+
top_voice_bar_plot_pane.object = None
|
| 1860 |
+
data_map_plot_pane.object = None
|
| 1861 |
+
voice_map_plot_pane.object = None
|
| 1862 |
+
return
|
| 1863 |
+
|
| 1864 |
+
df = current_analysis_last_period_df
|
| 1865 |
+
n = int(number_of_top_trafic_sites.value or 25)
|
| 1866 |
+
|
| 1867 |
+
# Top sites by data traffic
|
| 1868 |
+
top_sites = (
|
| 1869 |
+
df.groupby(["code", "City"])["total_data_trafic"]
|
| 1870 |
+
.sum()
|
| 1871 |
+
.sort_values(ascending=False)
|
| 1872 |
+
.head(n)
|
| 1873 |
+
)
|
| 1874 |
+
top_data_sites_table.value = top_sites.sort_values(ascending=True).reset_index()
|
| 1875 |
+
|
| 1876 |
+
fig_data = px.bar(
|
| 1877 |
+
top_sites.reset_index(),
|
| 1878 |
+
y=top_sites.reset_index()[["City", "code"]].agg(
|
| 1879 |
+
lambda x: "_".join(map(str, x)), axis=1
|
| 1880 |
+
),
|
| 1881 |
+
x="total_data_trafic",
|
| 1882 |
+
title=f"Top {n} sites by data traffic",
|
| 1883 |
+
orientation="h",
|
| 1884 |
+
text="total_data_trafic",
|
| 1885 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1886 |
+
)
|
| 1887 |
+
fig_data.update_layout(
|
| 1888 |
+
template="plotly_white",
|
| 1889 |
+
plot_bgcolor="white",
|
| 1890 |
+
paper_bgcolor="white",
|
| 1891 |
+
)
|
| 1892 |
+
top_data_bar_plot_pane.object = fig_data
|
| 1893 |
+
|
| 1894 |
+
# Top sites by voice traffic
|
| 1895 |
+
top_sites_voice = (
|
| 1896 |
+
df.groupby(["code", "City"])["total_voice_trafic"]
|
| 1897 |
+
.sum()
|
| 1898 |
+
.sort_values(ascending=False)
|
| 1899 |
+
.head(n)
|
| 1900 |
+
)
|
| 1901 |
+
top_voice_sites_table.value = top_sites_voice.sort_values(
|
| 1902 |
+
ascending=True
|
| 1903 |
+
).reset_index()
|
| 1904 |
+
|
| 1905 |
+
fig_voice = px.bar(
|
| 1906 |
+
top_sites_voice.reset_index(),
|
| 1907 |
+
y=top_sites_voice.reset_index()[["City", "code"]].agg(
|
| 1908 |
+
lambda x: "_".join(map(str, x)), axis=1
|
| 1909 |
+
),
|
| 1910 |
+
x="total_voice_trafic",
|
| 1911 |
+
title=f"Top {n} sites by voice traffic",
|
| 1912 |
+
orientation="h",
|
| 1913 |
+
text="total_voice_trafic",
|
| 1914 |
+
color_discrete_sequence=px.colors.qualitative.Plotly,
|
| 1915 |
+
)
|
| 1916 |
+
fig_voice.update_layout(
|
| 1917 |
+
template="plotly_white",
|
| 1918 |
+
plot_bgcolor="white",
|
| 1919 |
+
paper_bgcolor="white",
|
| 1920 |
+
)
|
| 1921 |
+
top_voice_bar_plot_pane.object = fig_voice
|
| 1922 |
+
|
| 1923 |
+
# Maps
|
| 1924 |
+
if {"Latitude", "Longitude"}.issubset(df.columns):
|
| 1925 |
+
min_size = 5
|
| 1926 |
+
max_size = 40
|
| 1927 |
+
|
| 1928 |
+
# Data traffic map
|
| 1929 |
+
df_data = (
|
| 1930 |
+
df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
|
| 1931 |
+
.sum()
|
| 1932 |
+
.reset_index()
|
| 1933 |
+
)
|
| 1934 |
+
if not df_data.empty:
|
| 1935 |
+
traffic_data_min = df_data["total_data_trafic"].min()
|
| 1936 |
+
traffic_data_max = df_data["total_data_trafic"].max()
|
| 1937 |
+
if traffic_data_max > traffic_data_min:
|
| 1938 |
+
df_data["bubble_size"] = df_data["total_data_trafic"].apply(
|
| 1939 |
+
lambda x: min_size
|
| 1940 |
+
+ (max_size - min_size)
|
| 1941 |
+
* (x - traffic_data_min)
|
| 1942 |
+
/ (traffic_data_max - traffic_data_min)
|
| 1943 |
+
)
|
| 1944 |
+
else:
|
| 1945 |
+
df_data["bubble_size"] = min_size
|
| 1946 |
+
|
| 1947 |
+
custom_blue_red = [
|
| 1948 |
+
[0.0, "#4292c6"],
|
| 1949 |
+
[0.2, "#2171b5"],
|
| 1950 |
+
[0.4, "#084594"],
|
| 1951 |
+
[0.6, "#cb181d"],
|
| 1952 |
+
[0.8, "#a50f15"],
|
| 1953 |
+
[1.0, "#67000d"],
|
| 1954 |
+
]
|
| 1955 |
+
|
| 1956 |
+
fig_map_data = px.scatter_map(
|
| 1957 |
+
df_data,
|
| 1958 |
+
lat="Latitude",
|
| 1959 |
+
lon="Longitude",
|
| 1960 |
+
color="total_data_trafic",
|
| 1961 |
+
size="bubble_size",
|
| 1962 |
+
color_continuous_scale=custom_blue_red,
|
| 1963 |
+
size_max=max_size,
|
| 1964 |
+
zoom=10,
|
| 1965 |
+
height=600,
|
| 1966 |
+
title="Data traffic distribution",
|
| 1967 |
+
hover_data={"code": True, "total_data_trafic": True},
|
| 1968 |
+
hover_name="code",
|
| 1969 |
+
text=[str(x) for x in df_data["code"]],
|
| 1970 |
+
)
|
| 1971 |
+
fig_map_data.update_layout(
|
| 1972 |
+
mapbox_style="open-street-map",
|
| 1973 |
+
coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
|
| 1974 |
+
coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
|
| 1975 |
+
font=dict(size=10, color="black"),
|
| 1976 |
+
)
|
| 1977 |
+
data_map_plot_pane.object = fig_map_data
|
| 1978 |
+
else:
|
| 1979 |
+
data_map_plot_pane.object = None
|
| 1980 |
+
|
| 1981 |
+
# Voice traffic map
|
| 1982 |
+
df_voice = (
|
| 1983 |
+
df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
|
| 1984 |
+
.sum()
|
| 1985 |
+
.reset_index()
|
| 1986 |
+
)
|
| 1987 |
+
if not df_voice.empty:
|
| 1988 |
+
traffic_voice_min = df_voice["total_voice_trafic"].min()
|
| 1989 |
+
traffic_voice_max = df_voice["total_voice_trafic"].max()
|
| 1990 |
+
if traffic_voice_max > traffic_voice_min:
|
| 1991 |
+
df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
|
| 1992 |
+
lambda x: min_size
|
| 1993 |
+
+ (max_size - min_size)
|
| 1994 |
+
* (x - traffic_voice_min)
|
| 1995 |
+
/ (traffic_voice_max - traffic_voice_min)
|
| 1996 |
+
)
|
| 1997 |
+
else:
|
| 1998 |
+
df_voice["bubble_size"] = min_size
|
| 1999 |
+
|
| 2000 |
+
custom_blue_red = [
|
| 2001 |
+
[0.0, "#4292c6"],
|
| 2002 |
+
[0.2, "#2171b5"],
|
| 2003 |
+
[0.4, "#084594"],
|
| 2004 |
+
[0.6, "#cb181d"],
|
| 2005 |
+
[0.8, "#a50f15"],
|
| 2006 |
+
[1.0, "#67000d"],
|
| 2007 |
+
]
|
| 2008 |
+
|
| 2009 |
+
fig_map_voice = px.scatter_map(
|
| 2010 |
+
df_voice,
|
| 2011 |
+
lat="Latitude",
|
| 2012 |
+
lon="Longitude",
|
| 2013 |
+
color="total_voice_trafic",
|
| 2014 |
+
size="bubble_size",
|
| 2015 |
+
color_continuous_scale=custom_blue_red,
|
| 2016 |
+
size_max=max_size,
|
| 2017 |
+
zoom=10,
|
| 2018 |
+
height=600,
|
| 2019 |
+
title="Voice traffic distribution",
|
| 2020 |
+
hover_data={"code": True, "total_voice_trafic": True},
|
| 2021 |
+
hover_name="code",
|
| 2022 |
+
text=[str(x) for x in df_voice["code"]],
|
| 2023 |
+
)
|
| 2024 |
+
fig_map_voice.update_layout(
|
| 2025 |
+
mapbox_style="open-street-map",
|
| 2026 |
+
coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
|
| 2027 |
+
coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
|
| 2028 |
+
font=dict(size=10, color="black"),
|
| 2029 |
+
)
|
| 2030 |
+
voice_map_plot_pane.object = fig_map_voice
|
| 2031 |
+
else:
|
| 2032 |
+
voice_map_plot_pane.object = None
|
| 2033 |
+
else:
|
| 2034 |
+
data_map_plot_pane.object = None
|
| 2035 |
+
voice_map_plot_pane.object = None
|
| 2036 |
+
|
| 2037 |
+
|
| 2038 |
+
def _update_persistent_table_view(event=None) -> None: # noqa: D401, ARG001
|
| 2039 |
+
"""Update persistent issues table based on current_persistent_df and top_critical_n."""
|
| 2040 |
+
if current_persistent_df is None or current_persistent_df.empty:
|
| 2041 |
+
persistent_table.value = pd.DataFrame()
|
| 2042 |
+
return
|
| 2043 |
+
|
| 2044 |
+
n = int(top_critical_n_widget.value or 25)
|
| 2045 |
+
persistent_table.value = current_persistent_df.head(n).round(2)
|
| 2046 |
+
|
| 2047 |
+
|
| 2048 |
+
def _recompute_persistent_from_widget(event=None) -> None: # noqa: ARG001
|
| 2049 |
+
"""Recompute persistent issues when the minimum consecutive days widget changes."""
|
| 2050 |
+
global current_persistent_df
|
| 2051 |
+
|
| 2052 |
+
if (
|
| 2053 |
+
current_analysis_df is None
|
| 2054 |
+
or current_analysis_df.empty
|
| 2055 |
+
or current_multi_rat_df is None
|
| 2056 |
+
or current_multi_rat_df.empty
|
| 2057 |
+
):
|
| 2058 |
+
current_persistent_df = None
|
| 2059 |
+
persistent_table.value = pd.DataFrame()
|
| 2060 |
+
return
|
| 2061 |
+
|
| 2062 |
+
persistent_df = analyze_persistent_availability(
|
| 2063 |
+
current_analysis_df,
|
| 2064 |
+
current_multi_rat_df,
|
| 2065 |
+
float(sla_2g.value),
|
| 2066 |
+
float(sla_3g.value),
|
| 2067 |
+
float(sla_lte.value),
|
| 2068 |
+
int(min_persistent_days_widget.value),
|
| 2069 |
+
)
|
| 2070 |
+
|
| 2071 |
+
current_persistent_df = (
|
| 2072 |
+
persistent_df if persistent_df is not None and not persistent_df.empty else None
|
| 2073 |
+
)
|
| 2074 |
+
_update_persistent_table_view()
|
| 2075 |
+
|
| 2076 |
+
|
| 2077 |
+
def _build_input_parameters_df() -> pd.DataFrame:
|
| 2078 |
+
"""Build DataFrame with input parameters used for the report."""
|
| 2079 |
+
params = []
|
| 2080 |
+
if file_2g.filename:
|
| 2081 |
+
params.append({"Parameter": "2G Report File", "Value": file_2g.filename})
|
| 2082 |
+
if file_3g.filename:
|
| 2083 |
+
params.append({"Parameter": "3G Report File", "Value": file_3g.filename})
|
| 2084 |
+
if file_lte.filename:
|
| 2085 |
+
params.append({"Parameter": "LTE Report File", "Value": file_lte.filename})
|
| 2086 |
+
if pre_range.value and len(pre_range.value) == 2:
|
| 2087 |
+
params.append({"Parameter": "Pre-Period Start", "Value": pre_range.value[0]})
|
| 2088 |
+
params.append({"Parameter": "Pre-Period End", "Value": pre_range.value[1]})
|
| 2089 |
+
if post_range.value and len(post_range.value) == 2:
|
| 2090 |
+
params.append({"Parameter": "Post-Period Start", "Value": post_range.value[0]})
|
| 2091 |
+
params.append({"Parameter": "Post-Period End", "Value": post_range.value[1]})
|
| 2092 |
+
if last_range.value and len(last_range.value) == 2:
|
| 2093 |
+
params.append({"Parameter": "Last Period Start", "Value": last_range.value[0]})
|
| 2094 |
+
params.append({"Parameter": "Last Period End", "Value": last_range.value[1]})
|
| 2095 |
+
params.append({"Parameter": "2G TCH Availability SLA (%)", "Value": sla_2g.value})
|
| 2096 |
+
params.append({"Parameter": "3G Cell Availability SLA (%)", "Value": sla_3g.value})
|
| 2097 |
+
params.append(
|
| 2098 |
+
{"Parameter": "LTE Cell Availability SLA (%)", "Value": sla_lte.value}
|
| 2099 |
+
)
|
| 2100 |
+
params.append(
|
| 2101 |
+
{
|
| 2102 |
+
"Parameter": "Number of Top Traffic Sites",
|
| 2103 |
+
"Value": number_of_top_trafic_sites.value,
|
| 2104 |
+
}
|
| 2105 |
+
)
|
| 2106 |
+
params.append(
|
| 2107 |
+
{
|
| 2108 |
+
"Parameter": "Number of Top Critical Sites",
|
| 2109 |
+
"Value": top_critical_n_widget.value,
|
| 2110 |
+
}
|
| 2111 |
+
)
|
| 2112 |
+
params.append(
|
| 2113 |
+
{
|
| 2114 |
+
"Parameter": "Minimum Consecutive Days Below SLA",
|
| 2115 |
+
"Value": min_persistent_days_widget.value,
|
| 2116 |
+
}
|
| 2117 |
+
)
|
| 2118 |
+
params.append({"Parameter": "Export Timestamp", "Value": datetime.now()})
|
| 2119 |
+
return pd.DataFrame(params)
|
| 2120 |
+
|
| 2121 |
+
|
| 2122 |
+
def _build_export_bytes() -> bytes:
|
| 2123 |
+
"""Build Excel report bytes mirroring Streamlit export structure."""
|
| 2124 |
+
if current_full_df is None:
|
| 2125 |
+
return b""
|
| 2126 |
+
|
| 2127 |
+
dfs: list[pd.DataFrame] = [
|
| 2128 |
+
_build_input_parameters_df(),
|
| 2129 |
+
current_full_df,
|
| 2130 |
+
(
|
| 2131 |
+
current_sum_pre_post_df
|
| 2132 |
+
if current_sum_pre_post_df is not None
|
| 2133 |
+
else pd.DataFrame()
|
| 2134 |
+
),
|
| 2135 |
+
(
|
| 2136 |
+
current_avg_pre_post_df
|
| 2137 |
+
if current_avg_pre_post_df is not None
|
| 2138 |
+
else pd.DataFrame()
|
| 2139 |
+
),
|
| 2140 |
+
(
|
| 2141 |
+
current_monthly_voice_df
|
| 2142 |
+
if current_monthly_voice_df is not None
|
| 2143 |
+
else pd.DataFrame()
|
| 2144 |
+
),
|
| 2145 |
+
(
|
| 2146 |
+
current_monthly_data_df
|
| 2147 |
+
if current_monthly_data_df is not None
|
| 2148 |
+
else pd.DataFrame()
|
| 2149 |
+
),
|
| 2150 |
+
(
|
| 2151 |
+
current_availability_summary_all_df
|
| 2152 |
+
if current_availability_summary_all_df is not None
|
| 2153 |
+
else pd.DataFrame()
|
| 2154 |
+
),
|
| 2155 |
+
current_site_2g_avail if current_site_2g_avail is not None else pd.DataFrame(),
|
| 2156 |
+
current_site_3g_avail if current_site_3g_avail is not None else pd.DataFrame(),
|
| 2157 |
+
(
|
| 2158 |
+
current_site_lte_avail
|
| 2159 |
+
if current_site_lte_avail is not None
|
| 2160 |
+
else pd.DataFrame()
|
| 2161 |
+
),
|
| 2162 |
+
(
|
| 2163 |
+
current_export_multi_rat_df
|
| 2164 |
+
if current_export_multi_rat_df is not None
|
| 2165 |
+
else pd.DataFrame()
|
| 2166 |
+
),
|
| 2167 |
+
(
|
| 2168 |
+
current_export_persistent_df
|
| 2169 |
+
if current_export_persistent_df is not None
|
| 2170 |
+
else pd.DataFrame()
|
| 2171 |
+
),
|
| 2172 |
+
]
|
| 2173 |
+
|
| 2174 |
+
sheet_names = [
|
| 2175 |
+
"Input_Parameters",
|
| 2176 |
+
"Global_Trafic_Analysis",
|
| 2177 |
+
"Sum_pre_post_analysis",
|
| 2178 |
+
"Avg_pre_post_analysis",
|
| 2179 |
+
"Monthly_voice_analysis",
|
| 2180 |
+
"Monthly_data_analysis",
|
| 2181 |
+
"Availability_Summary_All_RAT",
|
| 2182 |
+
"TwoG_Availability_By_Site",
|
| 2183 |
+
"ThreeG_Availability_By_Site",
|
| 2184 |
+
"LTE_Availability_By_Site",
|
| 2185 |
+
"MultiRAT_Availability_By_Site",
|
| 2186 |
+
"Top_Critical_Sites",
|
| 2187 |
+
]
|
| 2188 |
+
|
| 2189 |
+
return write_dfs_to_excel(dfs, sheet_names, index=True)
|
| 2190 |
+
|
| 2191 |
+
|
| 2192 |
+
def _export_callback() -> bytes:
|
| 2193 |
+
# Use cached bytes from the last completed analysis to make download instant
|
| 2194 |
+
data = current_export_bytes or b""
|
| 2195 |
+
if not data:
|
| 2196 |
+
return io.BytesIO()
|
| 2197 |
+
# FileDownload expects a file path or file-like object, not raw bytes
|
| 2198 |
+
return io.BytesIO(data)
|
| 2199 |
+
|
| 2200 |
+
|
| 2201 |
+
def _df_to_csv_bytes(df: pd.DataFrame | None) -> io.BytesIO:
|
| 2202 |
+
if df is None or getattr(df, "empty", True): # handles None and empty DataFrame
|
| 2203 |
+
return io.BytesIO()
|
| 2204 |
+
return io.BytesIO(df.to_csv(index=False).encode("utf-8"))
|
| 2205 |
+
|
| 2206 |
+
|
| 2207 |
+
def _download_multi_rat_table() -> io.BytesIO:
|
| 2208 |
+
value = getattr(multi_rat_table, "value", None)
|
| 2209 |
+
return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
|
| 2210 |
+
|
| 2211 |
+
|
| 2212 |
+
def _download_persistent_table() -> io.BytesIO:
|
| 2213 |
+
value = getattr(persistent_table, "value", None)
|
| 2214 |
+
return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
|
| 2215 |
+
|
| 2216 |
+
|
| 2217 |
+
def _download_top_data_sites() -> io.BytesIO:
|
| 2218 |
+
value = getattr(top_data_sites_table, "value", None)
|
| 2219 |
+
return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
|
| 2220 |
+
|
| 2221 |
+
|
| 2222 |
+
def _download_top_voice_sites() -> io.BytesIO:
|
| 2223 |
+
value = getattr(top_voice_sites_table, "value", None)
|
| 2224 |
+
return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
|
| 2225 |
+
|
| 2226 |
+
|
| 2227 |
+
# Client-side Fullscreen JS logic
|
| 2228 |
+
# We target the specific CSS class assigned to each plot pane.
|
| 2229 |
+
# Client-side Fullscreen JS logic with Shadow DOM support
|
| 2230 |
+
_JS_FULLSCREEN = """
|
| 2231 |
+
function findDeep(root, cls) {
|
| 2232 |
+
if (!root) return null;
|
| 2233 |
+
if (root.classList && root.classList.contains(cls)) return root;
|
| 2234 |
+
|
| 2235 |
+
if (root.shadowRoot) {
|
| 2236 |
+
var found = findDeep(root.shadowRoot, cls);
|
| 2237 |
+
if (found) return found;
|
| 2238 |
+
}
|
| 2239 |
+
|
| 2240 |
+
var children = root.children;
|
| 2241 |
+
if (children) {
|
| 2242 |
+
for (var i = 0; i < children.length; i++) {
|
| 2243 |
+
var found = findDeep(children[i], cls);
|
| 2244 |
+
if (found) return found;
|
| 2245 |
+
}
|
| 2246 |
+
}
|
| 2247 |
+
return null;
|
| 2248 |
+
}
|
| 2249 |
+
|
| 2250 |
+
var el = findDeep(document.body, target_class);
|
| 2251 |
+
|
| 2252 |
+
if (el) {
|
| 2253 |
+
if (el.requestFullscreen) {
|
| 2254 |
+
el.requestFullscreen();
|
| 2255 |
+
} else if (el.webkitRequestFullscreen) {
|
| 2256 |
+
el.webkitRequestFullscreen();
|
| 2257 |
+
} else if (el.msRequestFullscreen) {
|
| 2258 |
+
el.msRequestFullscreen();
|
| 2259 |
+
}
|
| 2260 |
+
} else {
|
| 2261 |
+
// Debug info
|
| 2262 |
+
alert("Impossible de passer en plein écran : élément '" + target_class + "' introuvable même après recherche approfondie (Shadow DOM).");
|
| 2263 |
+
}
|
| 2264 |
+
"""
|
| 2265 |
+
|
| 2266 |
+
|
| 2267 |
+
# Reactive bindings for drill-down controls & export
|
| 2268 |
+
site_select.param.watch(_update_site_view, "value")
|
| 2269 |
+
city_select.param.watch(_update_city_view, "value")
|
| 2270 |
+
top_critical_n_widget.param.watch(_update_persistent_table_view, "value")
|
| 2271 |
+
number_of_top_trafic_sites.param.watch(_update_top_sites_and_maps, "value")
|
| 2272 |
+
min_persistent_days_widget.param.watch(_recompute_persistent_from_widget, "value")
|
| 2273 |
+
|
| 2274 |
+
export_button.callback = _export_callback
|
| 2275 |
+
multi_rat_download.callback = _download_multi_rat_table
|
| 2276 |
+
persistent_download.callback = _download_persistent_table
|
| 2277 |
+
top_data_download.callback = _download_top_data_sites
|
| 2278 |
+
top_voice_download.callback = _download_top_voice_sites
|
| 2279 |
+
|
| 2280 |
+
site_traffic_fullscreen_btn.js_on_click(
|
| 2281 |
+
args={"target_class": "site-traffic-wrapper"},
|
| 2282 |
+
code=_JS_FULLSCREEN,
|
| 2283 |
+
)
|
| 2284 |
+
site_avail_fullscreen_btn.js_on_click(
|
| 2285 |
+
args={"target_class": "site-avail-wrapper"},
|
| 2286 |
+
code=_JS_FULLSCREEN,
|
| 2287 |
+
)
|
| 2288 |
+
city_traffic_fullscreen_btn.js_on_click(
|
| 2289 |
+
args={"target_class": "city-traffic-wrapper"},
|
| 2290 |
+
code=_JS_FULLSCREEN,
|
| 2291 |
+
)
|
| 2292 |
+
city_avail_fullscreen_btn.js_on_click(
|
| 2293 |
+
args={"target_class": "city-avail-wrapper"},
|
| 2294 |
+
code=_JS_FULLSCREEN,
|
| 2295 |
+
)
|
| 2296 |
+
daily_avail_fullscreen_btn.js_on_click(
|
| 2297 |
+
args={"target_class": "daily-avail-wrapper"},
|
| 2298 |
+
code=_JS_FULLSCREEN,
|
| 2299 |
+
)
|
| 2300 |
+
top_data_fullscreen_btn.js_on_click(
|
| 2301 |
+
args={"target_class": "top-data-bar-wrapper"},
|
| 2302 |
+
code=_JS_FULLSCREEN,
|
| 2303 |
+
)
|
| 2304 |
+
top_voice_fullscreen_btn.js_on_click(
|
| 2305 |
+
args={
|
| 2306 |
+
"target_class": "top-voice-bar-wrapper",
|
| 2307 |
+
},
|
| 2308 |
+
code=_JS_FULLSCREEN,
|
| 2309 |
+
)
|
| 2310 |
+
data_map_fullscreen_btn.js_on_click(
|
| 2311 |
+
args={"target_class": "data-map-wrapper"},
|
| 2312 |
+
code=_JS_FULLSCREEN,
|
| 2313 |
+
)
|
| 2314 |
+
voice_map_fullscreen_btn.js_on_click(
|
| 2315 |
+
args={"target_class": "voice-map-wrapper"},
|
| 2316 |
+
code=_JS_FULLSCREEN,
|
| 2317 |
+
)
|
| 2318 |
+
|
| 2319 |
+
|
| 2320 |
+
# --------------------------------------------------------------------------------------
|
| 2321 |
+
# Material Template layout
|
| 2322 |
+
# --------------------------------------------------------------------------------------
|
| 2323 |
+
|
| 2324 |
+
|
| 2325 |
+
template = pn.template.MaterialTemplate(
|
| 2326 |
+
title="📊 Global Trafic Analysis - Panel (2G / 3G / LTE)",
|
| 2327 |
+
)
|
| 2328 |
+
|
| 2329 |
+
# Ensure the template modal is large enough for fullscreen charts
|
| 2330 |
+
# Modal CSS override removed as we switched to native fullscreen.
|
| 2331 |
+
|
| 2332 |
+
sidebar_content = pn.Column(
|
| 2333 |
+
"""This Panel app is a migration of the existing Streamlit-based global traffic analysis.
|
| 2334 |
+
|
| 2335 |
+
Upload the 3 traffic reports (2G / 3G / LTE), configure the analysis periods and SLAs, then run the analysis.
|
| 2336 |
+
|
| 2337 |
+
In this first step, the app only validates the pipeline and shows a lightweight summary of the inputs.\nFull KPIs and visualizations will be added progressively.""",
|
| 2338 |
+
"---",
|
| 2339 |
+
file_2g,
|
| 2340 |
+
file_3g,
|
| 2341 |
+
file_lte,
|
| 2342 |
+
"---",
|
| 2343 |
+
pre_range,
|
| 2344 |
+
post_range,
|
| 2345 |
+
last_range,
|
| 2346 |
+
"---",
|
| 2347 |
+
sla_2g,
|
| 2348 |
+
sla_3g,
|
| 2349 |
+
sla_lte,
|
| 2350 |
+
"---",
|
| 2351 |
+
number_of_top_trafic_sites,
|
| 2352 |
+
min_persistent_days_widget,
|
| 2353 |
+
top_critical_n_widget,
|
| 2354 |
+
"---",
|
| 2355 |
+
run_button,
|
| 2356 |
+
)
|
| 2357 |
+
|
| 2358 |
+
main_content = pn.Column(
|
| 2359 |
+
status_pane,
|
| 2360 |
+
pn.pane.Markdown("## Input datasets summary"),
|
| 2361 |
+
summary_table,
|
| 2362 |
+
pn.layout.Divider(),
|
| 2363 |
+
pn.pane.Markdown("## Summary Analysis Pre / Post"),
|
| 2364 |
+
sum_pre_post_table,
|
| 2365 |
+
pn.layout.Divider(),
|
| 2366 |
+
pn.pane.Markdown("## Availability vs SLA (per RAT)"),
|
| 2367 |
+
pn.Tabs(
|
| 2368 |
+
(
|
| 2369 |
+
"2G",
|
| 2370 |
+
pn.Column(
|
| 2371 |
+
summary_2g_table, pn.pane.Markdown("Worst 25 sites"), worst_2g_table
|
| 2372 |
+
),
|
| 2373 |
+
),
|
| 2374 |
+
(
|
| 2375 |
+
"3G",
|
| 2376 |
+
pn.Column(
|
| 2377 |
+
summary_3g_table, pn.pane.Markdown("Worst 25 sites"), worst_3g_table
|
| 2378 |
+
),
|
| 2379 |
+
),
|
| 2380 |
+
(
|
| 2381 |
+
"LTE",
|
| 2382 |
+
pn.Column(
|
| 2383 |
+
summary_lte_table, pn.pane.Markdown("Worst 25 sites"), worst_lte_table
|
| 2384 |
+
),
|
| 2385 |
+
),
|
| 2386 |
+
),
|
| 2387 |
+
pn.layout.Divider(),
|
| 2388 |
+
pn.pane.Markdown("## Multi-RAT Availability (post-period)"),
|
| 2389 |
+
multi_rat_table,
|
| 2390 |
+
multi_rat_download,
|
| 2391 |
+
pn.layout.Divider(),
|
| 2392 |
+
pn.pane.Markdown("## Persistent availability issues (critical sites)"),
|
| 2393 |
+
persistent_table,
|
| 2394 |
+
persistent_download,
|
| 2395 |
+
pn.layout.Divider(),
|
| 2396 |
+
pn.pane.Markdown("## Site drill-down: traffic and availability over time"),
|
| 2397 |
+
site_select,
|
| 2398 |
+
site_traffic_plot,
|
| 2399 |
+
site_traffic_fullscreen_btn,
|
| 2400 |
+
site_avail_plot,
|
| 2401 |
+
site_avail_fullscreen_btn,
|
| 2402 |
+
site_degraded_table,
|
| 2403 |
+
pn.layout.Divider(),
|
| 2404 |
+
pn.pane.Markdown("## City drill-down: traffic and availability over time"),
|
| 2405 |
+
city_select,
|
| 2406 |
+
city_traffic_plot,
|
| 2407 |
+
city_traffic_fullscreen_btn,
|
| 2408 |
+
city_avail_plot,
|
| 2409 |
+
city_avail_fullscreen_btn,
|
| 2410 |
+
city_degraded_table,
|
| 2411 |
+
pn.layout.Divider(),
|
| 2412 |
+
pn.pane.Markdown("## Daily average availability per RAT"),
|
| 2413 |
+
daily_avail_plot,
|
| 2414 |
+
daily_avail_fullscreen_btn,
|
| 2415 |
+
daily_degraded_table,
|
| 2416 |
+
pn.layout.Divider(),
|
| 2417 |
+
pn.pane.Markdown("## Top traffic sites and geographic maps (last period)"),
|
| 2418 |
+
pn.Row(
|
| 2419 |
+
pn.Column(
|
| 2420 |
+
pn.pane.Markdown("### Top sites by data traffic"),
|
| 2421 |
+
top_data_sites_table,
|
| 2422 |
+
top_data_download,
|
| 2423 |
+
top_data_bar_plot,
|
| 2424 |
+
top_data_fullscreen_btn,
|
| 2425 |
+
),
|
| 2426 |
+
pn.Column(
|
| 2427 |
+
pn.pane.Markdown("### Top sites by voice traffic"),
|
| 2428 |
+
top_voice_sites_table,
|
| 2429 |
+
top_voice_download,
|
| 2430 |
+
top_voice_bar_plot,
|
| 2431 |
+
top_voice_fullscreen_btn,
|
| 2432 |
+
),
|
| 2433 |
+
),
|
| 2434 |
+
pn.Row(
|
| 2435 |
+
pn.Column(
|
| 2436 |
+
pn.pane.Markdown("### Data traffic map"),
|
| 2437 |
+
data_map_plot,
|
| 2438 |
+
data_map_fullscreen_btn,
|
| 2439 |
+
),
|
| 2440 |
+
pn.Column(
|
| 2441 |
+
pn.pane.Markdown("### Voice traffic map"),
|
| 2442 |
+
voice_map_plot,
|
| 2443 |
+
voice_map_fullscreen_btn,
|
| 2444 |
+
),
|
| 2445 |
+
),
|
| 2446 |
+
pn.layout.Divider(),
|
| 2447 |
+
pn.pane.Markdown("## Export"),
|
| 2448 |
+
export_button,
|
| 2449 |
+
)
|
| 2450 |
+
|
| 2451 |
+
|
| 2452 |
+
def get_page_components():
|
| 2453 |
+
return sidebar_content, main_content
|
| 2454 |
+
|
| 2455 |
+
|
| 2456 |
+
if __name__ == "__main__":
|
| 2457 |
+
template.sidebar.append(sidebar_content)
|
| 2458 |
+
template.main.append(main_content)
|
| 2459 |
+
template.servable()
|
physical_db/physical_database.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
process_kpi/__init__.py
ADDED
|
File without changes
|
process_kpi/gsm_kpi_requirements.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Required Input
|
| 2 |
+
|
| 3 |
+
- BH report
|
| 4 |
+
- Daily Report
|
| 5 |
+
- Dump file (2G dump)
|
| 6 |
+
- Number of last day for the analysis
|
| 7 |
+
- Number of days for blocking
|
| 8 |
+
- Sddch blocking threshold
|
| 9 |
+
- TCH blocking threshold
|
| 10 |
+
- Availability threshold
|
| 11 |
+
- TCH abis fails threshold
|
| 12 |
+
|
| 13 |
+
Analyse
|
| 14 |
+
|
| 15 |
+
DUMP
|
| 16 |
+
|
| 17 |
+
- Check that mandatory sheet exists in the dump
|
| 18 |
+
- Parse 2G databases
|
| 19 |
+
- Get number of TRX,TCH,SDCCH,amrSegLoadDepTchRateLower,amrSegLoadDepTchRateUpper from databases
|
| 20 |
+
- Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
|
| 21 |
+
- Get "Coef HF rate" by mapping "amrSegLoadDepTchRateLower" to 2G analysis_utility "hf_rate_coef" dict
|
| 22 |
+
- "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
|
| 23 |
+
- Get "Offered Traffic" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
|
| 24 |
+
|
| 25 |
+
BH DATA
|
| 26 |
+
|
| 27 |
+
- Pivot KPI in BH report
|
| 28 |
+
- Calculate Average and Max of Traffic
|
| 29 |
+
- Average of TCH blocking
|
| 30 |
+
- Average of SDCCH blocking
|
| 31 |
+
- Count number of Days with TCH blocking exceeded TCH blocking threshold
|
| 32 |
+
- Count number of Days with SDCCH blocking exceeded Sddch blocking threshold
|
| 33 |
+
- Count number of Days with Availability below Availability threshold
|
| 34 |
+
- "TCH UTILIZATION (@Max Traffic)" equal to "Max_Trafic" divided by "offered Traffic"
|
| 35 |
+
- Add "ErlabngB_value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
|
| 36 |
+
- Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
|
| 37 |
+
- "Target HR CHs" equal to "Target FR CHs" * 2
|
| 38 |
+
- Get "Signal" and "GPRS" value from databases
|
| 39 |
+
- Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
|
| 40 |
+
- "Target TRXs" equal to roundup(Target TCHs/8)
|
| 41 |
+
- "# of required TRXs" equal to difference between "Target TRXs" and "number of TRX"
|
| 42 |
+
|
| 43 |
+
Daily DATA
|
| 44 |
+
|
| 45 |
+
- Pivot KPI in Daily Report
|
| 46 |
+
- Count number of Days with Availability below Availability threshold
|
| 47 |
+
- Count number of Days with abis fails exceeded TCH abis fails threshold
|
process_kpi/kpi_health_check/__init__.py
ADDED
|
File without changes
|
process_kpi/kpi_health_check/benchmarks.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def calculate_sla_metrics(
|
| 5 |
+
df: pd.DataFrame,
|
| 6 |
+
kpi: str,
|
| 7 |
+
rules_df: pd.DataFrame | None = None
|
| 8 |
+
) -> dict:
|
| 9 |
+
"""
|
| 10 |
+
Calculates simple metrics for the given KPI trace:
|
| 11 |
+
- SLA value (if exists)
|
| 12 |
+
- Median (recent window)
|
| 13 |
+
|
| 14 |
+
Returns a dict with: 'sla': float|None, 'median': float|None
|
| 15 |
+
"""
|
| 16 |
+
res = {"sla": None, "median": None}
|
| 17 |
+
|
| 18 |
+
if df is None or df.empty or kpi not in df.columns:
|
| 19 |
+
return res
|
| 20 |
+
|
| 21 |
+
# 1. Get SLA from rules
|
| 22 |
+
if rules_df is not None and not rules_df.empty:
|
| 23 |
+
# Assuming rules_df has 'KPI' and 'sla' columns
|
| 24 |
+
# We also need to match RAT? usually passed or handled outside.
|
| 25 |
+
# Here we do a simplistic lookup.
|
| 26 |
+
try:
|
| 27 |
+
row = rules_df[rules_df["KPI"] == kpi]
|
| 28 |
+
if not row.empty:
|
| 29 |
+
val = row.iloc[0].get("sla")
|
| 30 |
+
res["sla"] = float(val) if pd.notna(val) else None
|
| 31 |
+
except Exception:
|
| 32 |
+
pass
|
| 33 |
+
|
| 34 |
+
# 2. Calculate Median (entire passed df, usually it's the recent window)
|
| 35 |
+
try:
|
| 36 |
+
vals = pd.to_numeric(df[kpi], errors="coerce").dropna()
|
| 37 |
+
if not vals.empty:
|
| 38 |
+
res["median"] = float(vals.median())
|
| 39 |
+
except Exception:
|
| 40 |
+
pass
|
| 41 |
+
|
| 42 |
+
return res
|
process_kpi/kpi_health_check/engine.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import date, datetime, timedelta
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _to_timestamp(value) -> pd.Timestamp | None:
|
| 8 |
+
if value is None:
|
| 9 |
+
return None
|
| 10 |
+
if isinstance(value, pd.Timestamp):
|
| 11 |
+
return value
|
| 12 |
+
if isinstance(value, datetime):
|
| 13 |
+
return pd.Timestamp(value)
|
| 14 |
+
if isinstance(value, date):
|
| 15 |
+
return pd.Timestamp(value)
|
| 16 |
+
try:
|
| 17 |
+
v = pd.to_datetime(value, errors="coerce")
|
| 18 |
+
return v if pd.notna(v) else None
|
| 19 |
+
except Exception: # noqa: BLE001
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def window_bounds_period(
|
| 24 |
+
end_dt: pd.Timestamp,
|
| 25 |
+
periods: int,
|
| 26 |
+
step: timedelta,
|
| 27 |
+
) -> tuple[pd.Timestamp, pd.Timestamp]:
|
| 28 |
+
start = end_dt - step * (int(periods) - 1)
|
| 29 |
+
return start, end_dt
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def window_bounds(end_date: date, days: int) -> tuple[date, date]:
|
| 33 |
+
start = end_date - timedelta(days=days - 1)
|
| 34 |
+
return start, end_date
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def is_bad(
|
| 38 |
+
value: float | None,
|
| 39 |
+
baseline: float | None,
|
| 40 |
+
direction: str,
|
| 41 |
+
rel_threshold_pct: float,
|
| 42 |
+
sla: float | None,
|
| 43 |
+
) -> bool:
|
| 44 |
+
if value is None or (isinstance(value, float) and np.isnan(value)):
|
| 45 |
+
return False
|
| 46 |
+
bad = False
|
| 47 |
+
if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
|
| 48 |
+
if direction == "higher_is_better":
|
| 49 |
+
bad = bad or (value < float(sla))
|
| 50 |
+
else:
|
| 51 |
+
bad = bad or (value > float(sla))
|
| 52 |
+
|
| 53 |
+
if baseline is None or (isinstance(baseline, float) and np.isnan(baseline)):
|
| 54 |
+
return bad
|
| 55 |
+
|
| 56 |
+
thr = float(rel_threshold_pct) / 100.0
|
| 57 |
+
if direction == "higher_is_better":
|
| 58 |
+
return bad or (value < baseline - abs(baseline) * thr)
|
| 59 |
+
return bad or (value > baseline + abs(baseline) * thr)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def max_consecutive_periods(values: list, step: timedelta) -> int:
|
| 63 |
+
if not values:
|
| 64 |
+
return 0
|
| 65 |
+
ts = [_to_timestamp(v) for v in values]
|
| 66 |
+
ts2 = [t for t in ts if t is not None]
|
| 67 |
+
if not ts2:
|
| 68 |
+
return 0
|
| 69 |
+
ts_sorted = sorted(set(ts2))
|
| 70 |
+
streak = 1
|
| 71 |
+
best = 1
|
| 72 |
+
for prev, cur in zip(ts_sorted, ts_sorted[1:]):
|
| 73 |
+
if cur == prev + step:
|
| 74 |
+
streak += 1
|
| 75 |
+
else:
|
| 76 |
+
streak = 1
|
| 77 |
+
if streak > best:
|
| 78 |
+
best = streak
|
| 79 |
+
return best
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def max_consecutive_days(dates: list[date]) -> int:
|
| 83 |
+
return max_consecutive_periods(dates, step=timedelta(days=1))
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def evaluate_health_check(
|
| 87 |
+
daily: pd.DataFrame,
|
| 88 |
+
rat: str,
|
| 89 |
+
rules_df: pd.DataFrame,
|
| 90 |
+
baseline_days_n: int,
|
| 91 |
+
recent_days_n: int,
|
| 92 |
+
rel_threshold_pct: float,
|
| 93 |
+
min_consecutive_days: int,
|
| 94 |
+
granularity: str = "Daily",
|
| 95 |
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 96 |
+
if daily.empty:
|
| 97 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 98 |
+
|
| 99 |
+
g = str(granularity or "Daily").strip().lower()
|
| 100 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 101 |
+
time_col = (
|
| 102 |
+
"period_start"
|
| 103 |
+
if (is_hourly and "period_start" in daily.columns)
|
| 104 |
+
else "date_only"
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
step = timedelta(hours=1) if is_hourly else timedelta(days=1)
|
| 108 |
+
baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
|
| 109 |
+
recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
|
| 110 |
+
min_periods = (
|
| 111 |
+
int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
end_dt = _to_timestamp(pd.to_datetime(daily[time_col], errors="coerce").max())
|
| 115 |
+
if end_dt is None:
|
| 116 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 117 |
+
|
| 118 |
+
recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
|
| 119 |
+
baseline_end_dt = recent_start_dt - step
|
| 120 |
+
baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
|
| 121 |
+
|
| 122 |
+
rat_rules = rules_df[rules_df["RAT"] == rat].copy()
|
| 123 |
+
kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
| 124 |
+
rules_by_kpi = {
|
| 125 |
+
str(r["KPI"]): r
|
| 126 |
+
for r in rat_rules.to_dict(orient="records")
|
| 127 |
+
if str(r.get("KPI", ""))
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
rows = []
|
| 131 |
+
|
| 132 |
+
for site_code, g_site in daily.groupby("site_code"):
|
| 133 |
+
city = (
|
| 134 |
+
g_site["City"].dropna().iloc[0]
|
| 135 |
+
if ("City" in g_site.columns and g_site["City"].notna().any())
|
| 136 |
+
else None
|
| 137 |
+
)
|
| 138 |
+
g_site = g_site.sort_values(time_col)
|
| 139 |
+
t_all = pd.to_datetime(g_site[time_col], errors="coerce")
|
| 140 |
+
baseline_mask_all = (t_all >= baseline_start_dt) & (t_all <= baseline_end_dt)
|
| 141 |
+
recent_mask_all = (t_all >= recent_start_dt) & (t_all <= recent_end_dt)
|
| 142 |
+
|
| 143 |
+
for kpi in kpis:
|
| 144 |
+
rule = rules_by_kpi.get(str(kpi), {})
|
| 145 |
+
direction = str(rule.get("direction", "higher_is_better"))
|
| 146 |
+
policy = str(rule.get("policy", "enforce") or "enforce").strip().lower()
|
| 147 |
+
sla = rule.get("sla", np.nan)
|
| 148 |
+
try:
|
| 149 |
+
sla_val = float(sla) if pd.notna(sla) else None
|
| 150 |
+
except Exception:
|
| 151 |
+
sla_val = None
|
| 152 |
+
|
| 153 |
+
sla_eval = None if policy == "notify" else sla_val
|
| 154 |
+
|
| 155 |
+
vals = pd.to_numeric(g_site[kpi], errors="coerce")
|
| 156 |
+
has_any = bool(vals.notna().any())
|
| 157 |
+
if not has_any:
|
| 158 |
+
rows.append(
|
| 159 |
+
{
|
| 160 |
+
"RAT": rat,
|
| 161 |
+
"site_code": int(site_code),
|
| 162 |
+
"City": city,
|
| 163 |
+
"KPI": kpi,
|
| 164 |
+
"status": "NO_DATA",
|
| 165 |
+
}
|
| 166 |
+
)
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
baseline_vals = vals.loc[baseline_mask_all]
|
| 170 |
+
recent_vals = vals.loc[recent_mask_all]
|
| 171 |
+
t_recent = t_all.loc[recent_vals.index]
|
| 172 |
+
|
| 173 |
+
baseline = (
|
| 174 |
+
baseline_vals.median(skipna=True) if baseline_mask_all.any() else np.nan
|
| 175 |
+
)
|
| 176 |
+
recent = (
|
| 177 |
+
recent_vals.median(skipna=True) if recent_mask_all.any() else np.nan
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
bad_dates: list = []
|
| 181 |
+
if recent_mask_all.any() and recent_vals.notna().any():
|
| 182 |
+
thr = float(rel_threshold_pct) / 100.0
|
| 183 |
+
b = float(baseline) if pd.notna(baseline) else None
|
| 184 |
+
bad_series = pd.Series(False, index=recent_vals.index)
|
| 185 |
+
|
| 186 |
+
if b is not None:
|
| 187 |
+
if direction == "higher_is_better":
|
| 188 |
+
bad_series = bad_series | (recent_vals < (b - abs(b) * thr))
|
| 189 |
+
else:
|
| 190 |
+
bad_series = bad_series | (recent_vals > (b + abs(b) * thr))
|
| 191 |
+
|
| 192 |
+
if sla_eval is not None and pd.notna(sla_eval):
|
| 193 |
+
if direction == "higher_is_better":
|
| 194 |
+
bad_series = bad_series | (recent_vals < float(sla_eval))
|
| 195 |
+
else:
|
| 196 |
+
bad_series = bad_series | (recent_vals > float(sla_eval))
|
| 197 |
+
|
| 198 |
+
bad_series = bad_series & recent_vals.notna() & t_recent.notna()
|
| 199 |
+
if bool(bad_series.any()):
|
| 200 |
+
bad_dates = t_recent.loc[bad_series].tolist()
|
| 201 |
+
|
| 202 |
+
max_streak = max_consecutive_periods(bad_dates, step=step)
|
| 203 |
+
persistent = max_streak >= int(min_periods)
|
| 204 |
+
|
| 205 |
+
is_bad_recent = is_bad(
|
| 206 |
+
float(recent) if pd.notna(recent) else None,
|
| 207 |
+
float(baseline) if pd.notna(baseline) else None,
|
| 208 |
+
direction,
|
| 209 |
+
rel_threshold_pct,
|
| 210 |
+
sla_eval,
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
is_bad_current = is_bad_recent
|
| 214 |
+
try:
|
| 215 |
+
last_mask = recent_mask_all & vals.notna() & t_all.notna()
|
| 216 |
+
if bool(last_mask.any()):
|
| 217 |
+
idx_last = t_all.loc[last_mask].idxmax()
|
| 218 |
+
last_val = vals.loc[idx_last]
|
| 219 |
+
is_bad_current = is_bad(
|
| 220 |
+
float(last_val) if pd.notna(last_val) else None,
|
| 221 |
+
float(baseline) if pd.notna(baseline) else None,
|
| 222 |
+
direction,
|
| 223 |
+
rel_threshold_pct,
|
| 224 |
+
sla_eval,
|
| 225 |
+
)
|
| 226 |
+
except Exception: # noqa: BLE001
|
| 227 |
+
pass
|
| 228 |
+
|
| 229 |
+
had_bad_recent = (len(bad_dates) > 0) or bool(is_bad_recent)
|
| 230 |
+
|
| 231 |
+
if policy == "notify":
|
| 232 |
+
if is_bad_current:
|
| 233 |
+
status = "NOTIFY"
|
| 234 |
+
elif had_bad_recent:
|
| 235 |
+
status = "NOTIFY_RESOLVED"
|
| 236 |
+
else:
|
| 237 |
+
status = "OK"
|
| 238 |
+
else:
|
| 239 |
+
if is_bad_current and persistent:
|
| 240 |
+
status = "PERSISTENT_DEGRADED"
|
| 241 |
+
elif is_bad_current:
|
| 242 |
+
status = "DEGRADED"
|
| 243 |
+
elif had_bad_recent:
|
| 244 |
+
status = "RESOLVED"
|
| 245 |
+
else:
|
| 246 |
+
status = "OK"
|
| 247 |
+
|
| 248 |
+
rows.append(
|
| 249 |
+
{
|
| 250 |
+
"RAT": rat,
|
| 251 |
+
"site_code": int(site_code),
|
| 252 |
+
"City": city,
|
| 253 |
+
"KPI": kpi,
|
| 254 |
+
"direction": direction,
|
| 255 |
+
"sla": sla_val,
|
| 256 |
+
"policy": policy,
|
| 257 |
+
"baseline_median": baseline,
|
| 258 |
+
"recent_median": recent,
|
| 259 |
+
"bad_days_recent": len(bad_dates),
|
| 260 |
+
"max_streak_recent": int(max_streak),
|
| 261 |
+
"status": status,
|
| 262 |
+
}
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
status_df = pd.DataFrame(rows)
|
| 266 |
+
|
| 267 |
+
summary_rows = []
|
| 268 |
+
for site_code, g in status_df.groupby("site_code"):
|
| 269 |
+
city = (
|
| 270 |
+
g["City"].dropna().iloc[0]
|
| 271 |
+
if ("City" in g.columns and g["City"].notna().any())
|
| 272 |
+
else None
|
| 273 |
+
)
|
| 274 |
+
degraded_cnt = int(g["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum())
|
| 275 |
+
persistent_cnt = int((g["status"] == "PERSISTENT_DEGRADED").sum())
|
| 276 |
+
resolved_cnt = int((g["status"] == "RESOLVED").sum())
|
| 277 |
+
summary_rows.append(
|
| 278 |
+
{
|
| 279 |
+
"RAT": rat,
|
| 280 |
+
"site_code": int(site_code),
|
| 281 |
+
"City": city,
|
| 282 |
+
"degraded_kpis": degraded_cnt,
|
| 283 |
+
"persistent_kpis": persistent_cnt,
|
| 284 |
+
"resolved_kpis": resolved_cnt,
|
| 285 |
+
}
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
summary_df = pd.DataFrame(summary_rows).sort_values(
|
| 289 |
+
by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
|
| 290 |
+
ascending=[False, False, False],
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
return status_df, summary_df
|
process_kpi/kpi_health_check/engine_v2.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from datetime import timedelta
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
from process_kpi.kpi_health_check.engine import window_bounds_period
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _to_datetime_series(s: pd.Series) -> pd.Series:
|
| 12 |
+
try:
|
| 13 |
+
return pd.to_datetime(s, errors="coerce")
|
| 14 |
+
except Exception:
|
| 15 |
+
return pd.to_datetime(pd.Series([], dtype="datetime64[ns]"), errors="coerce")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _vector_is_bad(
|
| 19 |
+
value: pd.Series,
|
| 20 |
+
baseline: pd.Series,
|
| 21 |
+
direction: str,
|
| 22 |
+
rel_threshold_pct: float,
|
| 23 |
+
sla: float | None,
|
| 24 |
+
) -> pd.Series:
|
| 25 |
+
v = pd.to_numeric(value, errors="coerce")
|
| 26 |
+
b = pd.to_numeric(baseline, errors="coerce")
|
| 27 |
+
|
| 28 |
+
bad = pd.Series(False, index=v.index)
|
| 29 |
+
|
| 30 |
+
if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
|
| 31 |
+
if str(direction) == "higher_is_better":
|
| 32 |
+
bad = bad | (v < float(sla))
|
| 33 |
+
else:
|
| 34 |
+
bad = bad | (v > float(sla))
|
| 35 |
+
|
| 36 |
+
thr = float(rel_threshold_pct) / 100.0
|
| 37 |
+
has_b = b.notna()
|
| 38 |
+
if bool(has_b.any()):
|
| 39 |
+
if str(direction) == "higher_is_better":
|
| 40 |
+
bad = bad | (v < (b - b.abs() * thr))
|
| 41 |
+
else:
|
| 42 |
+
bad = bad | (v > (b + b.abs() * thr))
|
| 43 |
+
|
| 44 |
+
bad = bad & v.notna()
|
| 45 |
+
return bad
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def evaluate_health_check(
|
| 49 |
+
daily: pd.DataFrame,
|
| 50 |
+
rat: str,
|
| 51 |
+
rules_df: pd.DataFrame,
|
| 52 |
+
baseline_days_n: int,
|
| 53 |
+
recent_days_n: int,
|
| 54 |
+
rel_threshold_pct: float,
|
| 55 |
+
min_consecutive_days: int,
|
| 56 |
+
granularity: str = "Daily",
|
| 57 |
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 58 |
+
if daily is None or daily.empty:
|
| 59 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 60 |
+
|
| 61 |
+
g = str(granularity or "Daily").strip().lower()
|
| 62 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 63 |
+
time_col = (
|
| 64 |
+
"period_start"
|
| 65 |
+
if (is_hourly and "period_start" in daily.columns)
|
| 66 |
+
else "date_only"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
step = timedelta(hours=1) if is_hourly else timedelta(days=1)
|
| 70 |
+
baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
|
| 71 |
+
recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
|
| 72 |
+
min_periods = (
|
| 73 |
+
int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
t_all = _to_datetime_series(daily[time_col])
|
| 77 |
+
end_dt = t_all.max()
|
| 78 |
+
if pd.isna(end_dt):
|
| 79 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 80 |
+
|
| 81 |
+
end_dt = pd.Timestamp(end_dt)
|
| 82 |
+
if is_hourly:
|
| 83 |
+
end_dt = end_dt.floor("h")
|
| 84 |
+
|
| 85 |
+
recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
|
| 86 |
+
baseline_end_dt = recent_start_dt - step
|
| 87 |
+
baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
|
| 88 |
+
|
| 89 |
+
rat_rules = (
|
| 90 |
+
rules_df[rules_df["RAT"] == rat].copy()
|
| 91 |
+
if isinstance(rules_df, pd.DataFrame)
|
| 92 |
+
else pd.DataFrame()
|
| 93 |
+
)
|
| 94 |
+
if rat_rules.empty or "KPI" not in rat_rules.columns:
|
| 95 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 96 |
+
|
| 97 |
+
kpi_cols = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
|
| 98 |
+
if not kpi_cols:
|
| 99 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 100 |
+
|
| 101 |
+
base_cols = ["site_code", time_col]
|
| 102 |
+
if "City" in daily.columns:
|
| 103 |
+
base_cols.append("City")
|
| 104 |
+
base = daily[base_cols + kpi_cols].copy()
|
| 105 |
+
base["site_code"] = pd.to_numeric(base["site_code"], errors="coerce")
|
| 106 |
+
base = base.dropna(subset=["site_code"]).copy()
|
| 107 |
+
base["site_code"] = base["site_code"].astype(int)
|
| 108 |
+
|
| 109 |
+
base_t = _to_datetime_series(base[time_col])
|
| 110 |
+
base["_t"] = base_t
|
| 111 |
+
base = base.dropna(subset=["_t"]).copy()
|
| 112 |
+
|
| 113 |
+
baseline_mask = (base["_t"] >= pd.to_datetime(baseline_start_dt)) & (
|
| 114 |
+
base["_t"] <= pd.to_datetime(baseline_end_dt)
|
| 115 |
+
)
|
| 116 |
+
recent_mask = (base["_t"] >= pd.to_datetime(recent_start_dt)) & (
|
| 117 |
+
base["_t"] <= pd.to_datetime(recent_end_dt)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
counts = base.groupby("site_code")[kpi_cols].count()
|
| 121 |
+
all_sites = counts.index
|
| 122 |
+
|
| 123 |
+
if "City" in base.columns:
|
| 124 |
+
city_map = (
|
| 125 |
+
base[["site_code", "City"]]
|
| 126 |
+
.dropna(subset=["City"])
|
| 127 |
+
.drop_duplicates("site_code")
|
| 128 |
+
.set_index("site_code")["City"]
|
| 129 |
+
)
|
| 130 |
+
city = city_map.reindex(all_sites)
|
| 131 |
+
else:
|
| 132 |
+
city = pd.Series([None] * len(all_sites), index=all_sites)
|
| 133 |
+
|
| 134 |
+
baseline_subset = base.loc[baseline_mask, ["site_code"] + kpi_cols]
|
| 135 |
+
recent_subset = base.loc[recent_mask, ["site_code", "_t"] + kpi_cols]
|
| 136 |
+
|
| 137 |
+
baseline_medians = (
|
| 138 |
+
baseline_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
|
| 139 |
+
if not baseline_subset.empty
|
| 140 |
+
else pd.DataFrame(index=all_sites)
|
| 141 |
+
)
|
| 142 |
+
recent_medians = (
|
| 143 |
+
recent_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
|
| 144 |
+
if not recent_subset.empty
|
| 145 |
+
else pd.DataFrame(index=all_sites)
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
recent_sorted = (
|
| 149 |
+
recent_subset.sort_values(["site_code", "_t"])
|
| 150 |
+
if not recent_subset.empty
|
| 151 |
+
else recent_subset
|
| 152 |
+
)
|
| 153 |
+
gap = recent_sorted.groupby("site_code")["_t"].diff()
|
| 154 |
+
gap_ok = (gap == step).fillna(False)
|
| 155 |
+
|
| 156 |
+
out_frames: list[pd.DataFrame] = []
|
| 157 |
+
|
| 158 |
+
for _, rr in rat_rules.iterrows():
|
| 159 |
+
kpi = str(rr.get("KPI"))
|
| 160 |
+
if not kpi or kpi not in kpi_cols:
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
direction = str(rr.get("direction", "higher_is_better"))
|
| 164 |
+
policy = str(rr.get("policy", "enforce") or "enforce").strip().lower()
|
| 165 |
+
sla_raw = rr.get("sla", np.nan)
|
| 166 |
+
try:
|
| 167 |
+
sla_val = float(sla_raw) if pd.notna(sla_raw) else None
|
| 168 |
+
except Exception:
|
| 169 |
+
sla_val = None
|
| 170 |
+
|
| 171 |
+
sla_eval = None if policy == "notify" else sla_val
|
| 172 |
+
|
| 173 |
+
cnt = counts[kpi].reindex(all_sites).fillna(0).astype(int)
|
| 174 |
+
has_any = cnt > 0
|
| 175 |
+
|
| 176 |
+
baseline = (
|
| 177 |
+
baseline_medians[kpi].reindex(all_sites)
|
| 178 |
+
if kpi in baseline_medians.columns
|
| 179 |
+
else pd.Series([np.nan] * len(all_sites), index=all_sites)
|
| 180 |
+
)
|
| 181 |
+
recent = (
|
| 182 |
+
recent_medians[kpi].reindex(all_sites)
|
| 183 |
+
if kpi in recent_medians.columns
|
| 184 |
+
else pd.Series([np.nan] * len(all_sites), index=all_sites)
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
if not recent_sorted.empty and kpi in recent_sorted.columns:
|
| 188 |
+
v_recent = pd.to_numeric(recent_sorted[kpi], errors="coerce")
|
| 189 |
+
b_row = recent_sorted["site_code"].map(
|
| 190 |
+
pd.to_numeric(
|
| 191 |
+
baseline_medians.get(kpi, pd.Series(dtype=float)), errors="coerce"
|
| 192 |
+
)
|
| 193 |
+
)
|
| 194 |
+
bad_row = _vector_is_bad(
|
| 195 |
+
v_recent, b_row, direction, float(rel_threshold_pct), sla_eval
|
| 196 |
+
)
|
| 197 |
+
bad_row = bad_row & recent_sorted["_t"].notna()
|
| 198 |
+
|
| 199 |
+
start = (~gap_ok) | (~bad_row) | gap_ok.isna()
|
| 200 |
+
run_id = start.groupby(recent_sorted["site_code"]).cumsum()
|
| 201 |
+
|
| 202 |
+
bad_counts = (
|
| 203 |
+
bad_row.groupby(recent_sorted["site_code"])
|
| 204 |
+
.sum()
|
| 205 |
+
.reindex(all_sites)
|
| 206 |
+
.fillna(0)
|
| 207 |
+
.astype(int)
|
| 208 |
+
)
|
| 209 |
+
streaks = (
|
| 210 |
+
bad_row.groupby([recent_sorted["site_code"], run_id])
|
| 211 |
+
.sum()
|
| 212 |
+
.groupby(level=0)
|
| 213 |
+
.max()
|
| 214 |
+
.reindex(all_sites)
|
| 215 |
+
.fillna(0)
|
| 216 |
+
.astype(int)
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
tmp_last = (
|
| 220 |
+
recent_sorted[["site_code", "_t", kpi]]
|
| 221 |
+
.dropna(subset=[kpi])
|
| 222 |
+
.sort_values(["site_code", "_t"])
|
| 223 |
+
)
|
| 224 |
+
if not tmp_last.empty:
|
| 225 |
+
last_vals = tmp_last.groupby("site_code")[kpi].tail(1)
|
| 226 |
+
last_map = pd.Series(
|
| 227 |
+
last_vals.values,
|
| 228 |
+
index=tmp_last.groupby("site_code")
|
| 229 |
+
.tail(1)["site_code"]
|
| 230 |
+
.astype(int)
|
| 231 |
+
.values,
|
| 232 |
+
)
|
| 233 |
+
last = last_map.reindex(all_sites)
|
| 234 |
+
else:
|
| 235 |
+
last = pd.Series([np.nan] * len(all_sites), index=all_sites)
|
| 236 |
+
else:
|
| 237 |
+
bad_counts = pd.Series([0] * len(all_sites), index=all_sites)
|
| 238 |
+
streaks = pd.Series([0] * len(all_sites), index=all_sites)
|
| 239 |
+
last = pd.Series([np.nan] * len(all_sites), index=all_sites)
|
| 240 |
+
|
| 241 |
+
is_bad_recent = _vector_is_bad(
|
| 242 |
+
recent, baseline, direction, float(rel_threshold_pct), sla_eval
|
| 243 |
+
)
|
| 244 |
+
is_bad_current = _vector_is_bad(
|
| 245 |
+
last, baseline, direction, float(rel_threshold_pct), sla_eval
|
| 246 |
+
)
|
| 247 |
+
had_bad_recent = (bad_counts > 0) | is_bad_recent
|
| 248 |
+
|
| 249 |
+
persistent = streaks >= int(min_periods)
|
| 250 |
+
|
| 251 |
+
status = pd.Series("OK", index=all_sites)
|
| 252 |
+
status = status.where(has_any, "NO_DATA")
|
| 253 |
+
|
| 254 |
+
if policy == "notify":
|
| 255 |
+
status = status.where(~has_any, "NO_DATA")
|
| 256 |
+
status = status.where(~(has_any & is_bad_current), "NOTIFY")
|
| 257 |
+
status = status.where(
|
| 258 |
+
~(has_any & (~is_bad_current) & had_bad_recent), "NOTIFY_RESOLVED"
|
| 259 |
+
)
|
| 260 |
+
else:
|
| 261 |
+
status = status.where(
|
| 262 |
+
~(has_any & is_bad_current & persistent), "PERSISTENT_DEGRADED"
|
| 263 |
+
)
|
| 264 |
+
status = status.where(
|
| 265 |
+
~(has_any & is_bad_current & (~persistent)), "DEGRADED"
|
| 266 |
+
)
|
| 267 |
+
status = status.where(
|
| 268 |
+
~(has_any & (~is_bad_current) & had_bad_recent), "RESOLVED"
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
frame = pd.DataFrame(
|
| 272 |
+
{
|
| 273 |
+
"RAT": rat,
|
| 274 |
+
"site_code": all_sites.astype(int),
|
| 275 |
+
"City": city.values,
|
| 276 |
+
"KPI": kpi,
|
| 277 |
+
"direction": direction,
|
| 278 |
+
"sla": sla_val,
|
| 279 |
+
"policy": policy,
|
| 280 |
+
"baseline_median": baseline.values,
|
| 281 |
+
"recent_median": recent.values,
|
| 282 |
+
"bad_days_recent": bad_counts.values,
|
| 283 |
+
"max_streak_recent": streaks.values,
|
| 284 |
+
"status": status.values,
|
| 285 |
+
}
|
| 286 |
+
)
|
| 287 |
+
out_frames.append(frame)
|
| 288 |
+
|
| 289 |
+
if not out_frames:
|
| 290 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 291 |
+
|
| 292 |
+
# Filter out empty frames to avoid FutureWarning about empty/all-NA entries
|
| 293 |
+
non_empty_frames = [f for f in out_frames if not f.empty and not f.isna().all().all()]
|
| 294 |
+
if not non_empty_frames:
|
| 295 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 296 |
+
|
| 297 |
+
status_df = pd.concat(non_empty_frames, ignore_index=True)
|
| 298 |
+
|
| 299 |
+
summary = (
|
| 300 |
+
status_df.groupby("site_code", as_index=False)
|
| 301 |
+
.agg(
|
| 302 |
+
RAT=("RAT", "first"),
|
| 303 |
+
City=("City", "first"),
|
| 304 |
+
degraded_kpis=(
|
| 305 |
+
"status",
|
| 306 |
+
lambda s: int(s.isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum()),
|
| 307 |
+
),
|
| 308 |
+
persistent_kpis=(
|
| 309 |
+
"status",
|
| 310 |
+
lambda s: int((s == "PERSISTENT_DEGRADED").sum()),
|
| 311 |
+
),
|
| 312 |
+
resolved_kpis=("status", lambda s: int((s == "RESOLVED").sum())),
|
| 313 |
+
)
|
| 314 |
+
.sort_values(
|
| 315 |
+
by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
|
| 316 |
+
ascending=[False, False, False],
|
| 317 |
+
)
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
return status_df, summary
|
process_kpi/kpi_health_check/export.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from panel_app.convert_to_excel_panel import write_dfs_to_excel
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _normalize_time_key(
|
| 7 |
+
df: pd.DataFrame, granularity: str
|
| 8 |
+
) -> tuple[str, pd.Series] | None:
|
| 9 |
+
if df is None or df.empty:
|
| 10 |
+
return None
|
| 11 |
+
g = str(granularity or "Daily").strip().lower()
|
| 12 |
+
is_hourly = g.startswith("hour") or g.startswith("h")
|
| 13 |
+
if is_hourly:
|
| 14 |
+
time_col = "period_start" if "period_start" in df.columns else "date_only"
|
| 15 |
+
t = pd.to_datetime(df.get(time_col), errors="coerce").dt.floor("h")
|
| 16 |
+
return time_col, t
|
| 17 |
+
|
| 18 |
+
time_col = "date_only" if "date_only" in df.columns else "period_start"
|
| 19 |
+
t = pd.to_datetime(df.get(time_col), errors="coerce").dt.date
|
| 20 |
+
return time_col, t
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _build_all_tech_sheet(
|
| 24 |
+
daily_by_rat: dict[str, pd.DataFrame],
|
| 25 |
+
granularity: str,
|
| 26 |
+
) -> tuple[str, pd.DataFrame] | None:
|
| 27 |
+
if not daily_by_rat or not isinstance(daily_by_rat, dict):
|
| 28 |
+
return None
|
| 29 |
+
|
| 30 |
+
g = str(granularity or "Daily").strip().lower()
|
| 31 |
+
prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
|
| 32 |
+
|
| 33 |
+
ordered_rats = ["2G", "3G", "LTE", "TWAMP"]
|
| 34 |
+
present = [r for r in ordered_rats if r in daily_by_rat]
|
| 35 |
+
if not present:
|
| 36 |
+
present = [str(r) for r in daily_by_rat.keys()]
|
| 37 |
+
|
| 38 |
+
time_col = None
|
| 39 |
+
keys = []
|
| 40 |
+
coords_parts = []
|
| 41 |
+
|
| 42 |
+
for rat in present:
|
| 43 |
+
df = daily_by_rat.get(rat)
|
| 44 |
+
if not isinstance(df, pd.DataFrame) or df.empty:
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
nt = _normalize_time_key(df, granularity)
|
| 48 |
+
if nt is None:
|
| 49 |
+
continue
|
| 50 |
+
tc, tkey = nt
|
| 51 |
+
if time_col is None:
|
| 52 |
+
time_col = tc
|
| 53 |
+
|
| 54 |
+
tmp = pd.DataFrame(
|
| 55 |
+
{"site_code": pd.to_numeric(df.get("site_code"), errors="coerce"), tc: tkey}
|
| 56 |
+
)
|
| 57 |
+
tmp = tmp.dropna(subset=["site_code", tc]).copy()
|
| 58 |
+
tmp["site_code"] = tmp["site_code"].astype(int)
|
| 59 |
+
keys.append(tmp[["site_code", tc]])
|
| 60 |
+
|
| 61 |
+
cols = [
|
| 62 |
+
c for c in ["site_code", "City", "Longitude", "Latitude"] if c in df.columns
|
| 63 |
+
]
|
| 64 |
+
if cols:
|
| 65 |
+
cp = df[cols].copy()
|
| 66 |
+
cp["site_code"] = pd.to_numeric(cp["site_code"], errors="coerce")
|
| 67 |
+
cp = cp.dropna(subset=["site_code"]).copy()
|
| 68 |
+
cp["site_code"] = cp["site_code"].astype(int)
|
| 69 |
+
coords_parts.append(cp)
|
| 70 |
+
|
| 71 |
+
if not keys or time_col is None:
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
base = pd.concat(keys, ignore_index=True).drop_duplicates(
|
| 75 |
+
subset=["site_code", time_col]
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
coords = None
|
| 79 |
+
if coords_parts:
|
| 80 |
+
coords_all = pd.concat(coords_parts, ignore_index=True)
|
| 81 |
+
coords_all = coords_all.drop_duplicates(subset=["site_code"])
|
| 82 |
+
keep = [
|
| 83 |
+
c
|
| 84 |
+
for c in ["site_code", "City", "Longitude", "Latitude"]
|
| 85 |
+
if c in coords_all.columns
|
| 86 |
+
]
|
| 87 |
+
coords = coords_all[keep].copy() if keep else None
|
| 88 |
+
|
| 89 |
+
if isinstance(coords, pd.DataFrame) and not coords.empty:
|
| 90 |
+
base = pd.merge(base, coords, on="site_code", how="left")
|
| 91 |
+
|
| 92 |
+
base["ID"] = base[time_col].astype(str) + "_" + base["site_code"].astype(str)
|
| 93 |
+
|
| 94 |
+
meta_cols = {
|
| 95 |
+
"site_code",
|
| 96 |
+
"period_start",
|
| 97 |
+
"date_only",
|
| 98 |
+
"Longitude",
|
| 99 |
+
"Latitude",
|
| 100 |
+
"City",
|
| 101 |
+
"RAT",
|
| 102 |
+
"ID",
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
out = base
|
| 106 |
+
for rat in present:
|
| 107 |
+
df = daily_by_rat.get(rat)
|
| 108 |
+
if not isinstance(df, pd.DataFrame) or df.empty:
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
nt = _normalize_time_key(df, granularity)
|
| 112 |
+
if nt is None:
|
| 113 |
+
continue
|
| 114 |
+
tc, tkey = nt
|
| 115 |
+
|
| 116 |
+
tmp = df.copy()
|
| 117 |
+
tmp["site_code"] = pd.to_numeric(tmp.get("site_code"), errors="coerce")
|
| 118 |
+
tmp = tmp.dropna(subset=["site_code"]).copy()
|
| 119 |
+
tmp["site_code"] = tmp["site_code"].astype(int)
|
| 120 |
+
tmp[tc] = tkey
|
| 121 |
+
tmp = tmp.dropna(subset=[tc]).copy()
|
| 122 |
+
|
| 123 |
+
kpi_cols = [c for c in tmp.columns if c not in meta_cols]
|
| 124 |
+
keep_cols = ["site_code", tc] + kpi_cols
|
| 125 |
+
tmp2 = tmp[keep_cols].copy()
|
| 126 |
+
rename = {c: f"{rat}_{c}" for c in kpi_cols}
|
| 127 |
+
tmp2 = tmp2.rename(columns=rename)
|
| 128 |
+
out = pd.merge(
|
| 129 |
+
out,
|
| 130 |
+
tmp2,
|
| 131 |
+
left_on=["site_code", time_col],
|
| 132 |
+
right_on=["site_code", tc],
|
| 133 |
+
how="left",
|
| 134 |
+
)
|
| 135 |
+
if tc != time_col and tc in out.columns:
|
| 136 |
+
out = out.drop(columns=[tc], errors="ignore")
|
| 137 |
+
|
| 138 |
+
first_cols = [
|
| 139 |
+
c
|
| 140 |
+
for c in ["ID", time_col, "site_code", "City", "Longitude", "Latitude"]
|
| 141 |
+
if c in out.columns
|
| 142 |
+
]
|
| 143 |
+
rest = [c for c in out.columns if c not in first_cols]
|
| 144 |
+
out = out[first_cols + rest]
|
| 145 |
+
try:
|
| 146 |
+
out = out.sort_values(by=[time_col, "site_code"], ascending=[True, True])
|
| 147 |
+
except Exception:
|
| 148 |
+
pass
|
| 149 |
+
|
| 150 |
+
return f"{prefix}_All", out
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def build_export_bytes(
|
| 154 |
+
datasets_df: pd.DataFrame | None,
|
| 155 |
+
rules_df: pd.DataFrame | None,
|
| 156 |
+
summary_df: pd.DataFrame | None,
|
| 157 |
+
status_df: pd.DataFrame | None,
|
| 158 |
+
daily_by_rat: dict[str, pd.DataFrame] | None = None,
|
| 159 |
+
granularity: str = "Daily",
|
| 160 |
+
multirat_summary_df: pd.DataFrame | None = None,
|
| 161 |
+
top_anomalies_df: pd.DataFrame | None = None,
|
| 162 |
+
complaint_multirat_df: pd.DataFrame | None = None,
|
| 163 |
+
complaint_top_anomalies_df: pd.DataFrame | None = None,
|
| 164 |
+
ops_queue_df: pd.DataFrame | None = None,
|
| 165 |
+
delta_df: pd.DataFrame | None = None,
|
| 166 |
+
profile: dict | None = None,
|
| 167 |
+
) -> bytes:
|
| 168 |
+
if profile is not None:
|
| 169 |
+
profile["export_prep_seconds"] = 0.0
|
| 170 |
+
profile["excel_total_seconds"] = 0.0
|
| 171 |
+
|
| 172 |
+
t_prep0 = pd.Timestamp.utcnow() if profile is not None else None
|
| 173 |
+
dfs = [
|
| 174 |
+
datasets_df if isinstance(datasets_df, pd.DataFrame) else pd.DataFrame(),
|
| 175 |
+
rules_df if isinstance(rules_df, pd.DataFrame) else pd.DataFrame(),
|
| 176 |
+
summary_df if isinstance(summary_df, pd.DataFrame) else pd.DataFrame(),
|
| 177 |
+
status_df if isinstance(status_df, pd.DataFrame) else pd.DataFrame(),
|
| 178 |
+
]
|
| 179 |
+
|
| 180 |
+
sheet_names = [
|
| 181 |
+
"Datasets",
|
| 182 |
+
"KPI_Rules",
|
| 183 |
+
"Site_Summary",
|
| 184 |
+
"Site_KPI_Status",
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
max_data_rows = 1048575
|
| 188 |
+
if daily_by_rat and isinstance(daily_by_rat, dict):
|
| 189 |
+
g = str(granularity or "Daily").strip().lower()
|
| 190 |
+
prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
|
| 191 |
+
|
| 192 |
+
combined = _build_all_tech_sheet(daily_by_rat, granularity)
|
| 193 |
+
if combined is not None:
|
| 194 |
+
base, df_all = combined
|
| 195 |
+
if len(df_all) <= max_data_rows:
|
| 196 |
+
dfs.append(df_all)
|
| 197 |
+
sheet_names.append(base[:31])
|
| 198 |
+
else:
|
| 199 |
+
part = 1
|
| 200 |
+
for start in range(0, len(df_all), max_data_rows):
|
| 201 |
+
end = min(start + max_data_rows, len(df_all))
|
| 202 |
+
dfs.append(df_all.iloc[start:end].copy())
|
| 203 |
+
sheet_names.append(f"{base}_p{part}"[:31])
|
| 204 |
+
part += 1
|
| 205 |
+
else:
|
| 206 |
+
for rat, df in daily_by_rat.items():
|
| 207 |
+
if not isinstance(df, pd.DataFrame):
|
| 208 |
+
continue
|
| 209 |
+
base = f"{prefix}_All_{str(rat)}"
|
| 210 |
+
if len(df) <= max_data_rows:
|
| 211 |
+
dfs.append(df)
|
| 212 |
+
sheet_names.append(base[:31])
|
| 213 |
+
else:
|
| 214 |
+
part = 1
|
| 215 |
+
for start in range(0, len(df), max_data_rows):
|
| 216 |
+
end = min(start + max_data_rows, len(df))
|
| 217 |
+
dfs.append(df.iloc[start:end].copy())
|
| 218 |
+
sheet_names.append(f"{base}_p{part}"[:31])
|
| 219 |
+
part += 1
|
| 220 |
+
|
| 221 |
+
dfs.extend(
|
| 222 |
+
[
|
| 223 |
+
(
|
| 224 |
+
multirat_summary_df
|
| 225 |
+
if isinstance(multirat_summary_df, pd.DataFrame)
|
| 226 |
+
else pd.DataFrame()
|
| 227 |
+
),
|
| 228 |
+
(
|
| 229 |
+
top_anomalies_df
|
| 230 |
+
if isinstance(top_anomalies_df, pd.DataFrame)
|
| 231 |
+
else pd.DataFrame()
|
| 232 |
+
),
|
| 233 |
+
(
|
| 234 |
+
complaint_multirat_df
|
| 235 |
+
if isinstance(complaint_multirat_df, pd.DataFrame)
|
| 236 |
+
else pd.DataFrame()
|
| 237 |
+
),
|
| 238 |
+
(
|
| 239 |
+
complaint_top_anomalies_df
|
| 240 |
+
if isinstance(complaint_top_anomalies_df, pd.DataFrame)
|
| 241 |
+
else pd.DataFrame()
|
| 242 |
+
),
|
| 243 |
+
ops_queue_df if isinstance(ops_queue_df, pd.DataFrame) else pd.DataFrame(),
|
| 244 |
+
delta_df if isinstance(delta_df, pd.DataFrame) else pd.DataFrame(),
|
| 245 |
+
]
|
| 246 |
+
)
|
| 247 |
+
sheet_names.extend(
|
| 248 |
+
[
|
| 249 |
+
"MultiRAT_Summary",
|
| 250 |
+
"Top_Anomalies",
|
| 251 |
+
"Complaint_MultiRAT",
|
| 252 |
+
"Complaint_Top_Anomalies",
|
| 253 |
+
"Ops_Queue",
|
| 254 |
+
"Delta",
|
| 255 |
+
]
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
if profile is not None:
|
| 259 |
+
t_prep1 = pd.Timestamp.utcnow()
|
| 260 |
+
if t_prep0 is not None:
|
| 261 |
+
profile["export_prep_seconds"] = float((t_prep1 - t_prep0).total_seconds())
|
| 262 |
+
profile["sheet_count"] = int(len(sheet_names))
|
| 263 |
+
|
| 264 |
+
return write_dfs_to_excel(dfs, sheet_names, index=False, profile=profile)
|
process_kpi/kpi_health_check/io.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import zipfile
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def read_bytes_to_df(file_bytes: bytes, filename: str) -> pd.DataFrame:
|
| 8 |
+
if not file_bytes:
|
| 9 |
+
raise ValueError("Empty file")
|
| 10 |
+
|
| 11 |
+
filename_l = (filename or "").lower()
|
| 12 |
+
data = io.BytesIO(file_bytes)
|
| 13 |
+
|
| 14 |
+
if filename_l.endswith(".zip"):
|
| 15 |
+
with zipfile.ZipFile(data) as z:
|
| 16 |
+
csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
|
| 17 |
+
if not csv_files:
|
| 18 |
+
raise ValueError("No CSV file found in the ZIP archive")
|
| 19 |
+
dfs = []
|
| 20 |
+
for csv_name in csv_files:
|
| 21 |
+
try:
|
| 22 |
+
with z.open(csv_name) as f:
|
| 23 |
+
df = pd.read_csv(
|
| 24 |
+
f,
|
| 25 |
+
encoding="latin1",
|
| 26 |
+
sep=";",
|
| 27 |
+
low_memory=False,
|
| 28 |
+
)
|
| 29 |
+
if isinstance(df, pd.DataFrame) and not df.empty:
|
| 30 |
+
dfs.append(df)
|
| 31 |
+
except Exception:
|
| 32 |
+
continue
|
| 33 |
+
|
| 34 |
+
if not dfs:
|
| 35 |
+
raise ValueError("No readable CSV content found in the ZIP archive")
|
| 36 |
+
|
| 37 |
+
if len(dfs) == 1:
|
| 38 |
+
return dfs[0]
|
| 39 |
+
|
| 40 |
+
return pd.concat(dfs, ignore_index=True, sort=False)
|
| 41 |
+
|
| 42 |
+
if filename_l.endswith(".csv"):
|
| 43 |
+
return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
|
| 44 |
+
|
| 45 |
+
raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")
|
process_kpi/kpi_health_check/kpi_groups.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
# Regex patterns for KPI classification
|
| 6 |
+
# Order matters: first match wins
|
| 7 |
+
PATTERNS = {
|
| 8 |
+
# Refined patterns based on user data
|
| 9 |
+
"Transmission": r"(?i)(abis|a-?bis|iub|x2|backhaul|transport|transmission|s1(?:\s|_)*sig(?:\s|_)*conn(?:\s|_)*sr)",
|
| 10 |
+
"Interference": r"(?i)(\brtwp\b|avg(?:\s|_)*rtwp|rtwp(?:\s|_)*rx(?:\s|_)*ant)",
|
| 11 |
+
"Mobility": r"(?i)(handover|(?<![A-Za-z0-9])ho(?![A-Za-z0-9])|soft(?:\s|_|-)*ho|intra(?:\s|_|-)*freq(?:\s|_|-)*ho|inter(?:\s|_|-)*freq(?:\s|_|-)*ho|csfb)",
|
| 12 |
+
"Success Rate": r"(?i)(cssr|success|attach|setup|establ|answer|complete|connected|ho.*succ|\berab\b|\brrc\b.*(?:\bsr\b|rate|succ)|\basr\b|\bsr\b)",
|
| 13 |
+
"Fails/Drop/Block": r"(?i)(drop|dcr|fail|block|reject|deny|loss|lost|discard|congestion|accessibility.*fail|retention.*fail)",
|
| 14 |
+
"Throughput": r"(?i)(throughput|thp|thrput|PDCP|debit|dl.*rate|ul.*rate|bitrate)",
|
| 15 |
+
"Traffic": r"(?i)(traffic|volume|erl|payload|gbytes|gb|load|usage|utilization)",
|
| 16 |
+
"Availability": r"(?i)(availability|avail|unavailability|unavail|dispo|disponibil|uptime)",
|
| 17 |
+
"Latency": r"(?i)(latency|delay|\brt\b|rtt)",
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def classify_kpi(kpi_name: str) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Classifies a KPI name into a group based on regex patterns.
|
| 24 |
+
Returns 'Other' if no match found.
|
| 25 |
+
"""
|
| 26 |
+
kpi_str = str(kpi_name)
|
| 27 |
+
for group, pattern in PATTERNS.items():
|
| 28 |
+
if re.search(pattern, kpi_str):
|
| 29 |
+
return group
|
| 30 |
+
return "Other"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_kpis_by_group(all_kpis: list[str]) -> dict[str, list[str]]:
|
| 34 |
+
"""
|
| 35 |
+
Returns a dictionary mapping group names to lists of KPIs.
|
| 36 |
+
"""
|
| 37 |
+
groups = {g: [] for g in PATTERNS.keys()}
|
| 38 |
+
groups["Other"] = []
|
| 39 |
+
|
| 40 |
+
for kpi in sorted(all_kpis):
|
| 41 |
+
group = classify_kpi(kpi)
|
| 42 |
+
groups[group].append(kpi)
|
| 43 |
+
|
| 44 |
+
# Remove empty groups
|
| 45 |
+
return {k: v for k, v in groups.items() if v}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def filter_kpis(
|
| 49 |
+
all_kpis: list[str],
|
| 50 |
+
group: str,
|
| 51 |
+
mode: str = "Filter",
|
| 52 |
+
top_n: int = 12,
|
| 53 |
+
stats_df: pd.DataFrame | None = None,
|
| 54 |
+
) -> list[str]:
|
| 55 |
+
"""
|
| 56 |
+
Filters KPIs based on the selected group and mode.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
all_kpis: List of available KPI names.
|
| 60 |
+
group: Selected group name (or 'All').
|
| 61 |
+
mode: 'Filter' or 'Top-N'.
|
| 62 |
+
top_n: Max KPIs to return if filtering needs truncation or specific selection.
|
| 63 |
+
stats_df: Optional DataFrame with 'site_code', 'KPI', 'is_bad', etc. for sorting.
|
| 64 |
+
"""
|
| 65 |
+
if not all_kpis:
|
| 66 |
+
return []
|
| 67 |
+
|
| 68 |
+
# 1. Filter by group
|
| 69 |
+
if group and group != "All (selected KPIs)":
|
| 70 |
+
# Handle "Success Rate (>= SLA...)" formatted names if passed from UI
|
| 71 |
+
clean_group = group.split(" (")[0]
|
| 72 |
+
# Basic mapping check - if the group name in UI has extra text, we match key prefix
|
| 73 |
+
target_group = "Other"
|
| 74 |
+
for k in PATTERNS.keys():
|
| 75 |
+
if k in group:
|
| 76 |
+
target_group = k
|
| 77 |
+
break
|
| 78 |
+
if "Other" in group:
|
| 79 |
+
target_group = "Other"
|
| 80 |
+
|
| 81 |
+
candidates = [k for k in all_kpis if classify_kpi(k) == target_group]
|
| 82 |
+
else:
|
| 83 |
+
candidates = list(all_kpis)
|
| 84 |
+
|
| 85 |
+
if not candidates:
|
| 86 |
+
return []
|
| 87 |
+
|
| 88 |
+
# 2. Sort/Limit if needed
|
| 89 |
+
# If we have stats, we can sort by "badness" or variance
|
| 90 |
+
# For now, simplistic alpha sort unless we have stats
|
| 91 |
+
if stats_df is not None and not stats_df.empty:
|
| 92 |
+
# TODO: Implement smart sorting based on stats if available
|
| 93 |
+
# For V1, we just return candidates sorted alphabetically
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
return sorted(candidates)
|
process_kpi/kpi_health_check/multi_rat.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from process_kpi.kpi_health_check.kpi_groups import classify_kpi
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _slug(value: str) -> str:
|
| 7 |
+
s = str(value or "").strip().lower()
|
| 8 |
+
out = []
|
| 9 |
+
prev_underscore = False
|
| 10 |
+
for ch in s:
|
| 11 |
+
if ch.isalnum():
|
| 12 |
+
out.append(ch)
|
| 13 |
+
prev_underscore = False
|
| 14 |
+
else:
|
| 15 |
+
if not prev_underscore:
|
| 16 |
+
out.append("_")
|
| 17 |
+
prev_underscore = True
|
| 18 |
+
return "".join(out).strip("_")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _fmt_num(value) -> str:
|
| 22 |
+
try:
|
| 23 |
+
v = pd.to_numeric(value, errors="coerce")
|
| 24 |
+
if pd.isna(v):
|
| 25 |
+
return "NA"
|
| 26 |
+
return f"{float(v):.3g}"
|
| 27 |
+
except Exception: # noqa: BLE001
|
| 28 |
+
return "NA"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _build_rca_tags(row: dict) -> str:
|
| 32 |
+
tags: list[str] = []
|
| 33 |
+
group = str(row.get("rca_group") or "Other")
|
| 34 |
+
tags.append(_slug(group) if group else "other")
|
| 35 |
+
|
| 36 |
+
status = str(row.get("status") or "").strip().upper()
|
| 37 |
+
if status == "PERSISTENT_DEGRADED":
|
| 38 |
+
tags.append("persistent")
|
| 39 |
+
elif status == "DEGRADED":
|
| 40 |
+
tags.append("degraded")
|
| 41 |
+
elif status:
|
| 42 |
+
tags.append(_slug(status))
|
| 43 |
+
|
| 44 |
+
baseline = row.get("baseline_median")
|
| 45 |
+
recent = row.get("recent_median")
|
| 46 |
+
if pd.isna(pd.to_numeric(baseline, errors="coerce")):
|
| 47 |
+
tags.append("missing_baseline")
|
| 48 |
+
if pd.isna(pd.to_numeric(recent, errors="coerce")):
|
| 49 |
+
tags.append("missing_recent")
|
| 50 |
+
|
| 51 |
+
impact = pd.to_numeric(row.get("impacted_rats"), errors="coerce")
|
| 52 |
+
if pd.notna(impact) and float(impact) >= 2:
|
| 53 |
+
tags.append("multi_rat")
|
| 54 |
+
|
| 55 |
+
return ",".join([t for t in tags if t])
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _build_rca_hint(row: dict) -> str:
|
| 59 |
+
group = str(row.get("rca_group") or "Other")
|
| 60 |
+
kpi = str(row.get("KPI") or "")
|
| 61 |
+
rat = str(row.get("RAT") or "")
|
| 62 |
+
status = str(row.get("status") or "")
|
| 63 |
+
baseline_s = _fmt_num(row.get("baseline_median"))
|
| 64 |
+
recent_s = _fmt_num(row.get("recent_median"))
|
| 65 |
+
streak = int(pd.to_numeric(row.get("max_streak_recent"), errors="coerce") or 0)
|
| 66 |
+
bad = int(pd.to_numeric(row.get("bad_days_recent"), errors="coerce") or 0)
|
| 67 |
+
return (
|
| 68 |
+
f"{group} | {rat} | {kpi} | {status} | "
|
| 69 |
+
f"baseline={baseline_s} recent={recent_s} | streak={streak}d bad={bad}d"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def compute_multirat_views(
|
| 74 |
+
status_df: pd.DataFrame,
|
| 75 |
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 76 |
+
if status_df is None or status_df.empty:
|
| 77 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 78 |
+
|
| 79 |
+
df = status_df.copy()
|
| 80 |
+
df["is_degraded"] = df["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"])
|
| 81 |
+
df["is_persistent"] = df["status"].isin(["PERSISTENT_DEGRADED"])
|
| 82 |
+
df["is_resolved"] = df["status"].isin(["RESOLVED"])
|
| 83 |
+
|
| 84 |
+
def _first_city(s: pd.Series):
|
| 85 |
+
s2 = s.dropna()
|
| 86 |
+
return s2.iloc[0] if not s2.empty else None
|
| 87 |
+
|
| 88 |
+
base = (
|
| 89 |
+
df.groupby("site_code", as_index=False)
|
| 90 |
+
.agg(
|
| 91 |
+
City=("City", _first_city),
|
| 92 |
+
degraded_kpis_total=("is_degraded", "sum"),
|
| 93 |
+
persistent_kpis_total=("is_persistent", "sum"),
|
| 94 |
+
resolved_kpis_total=("is_resolved", "sum"),
|
| 95 |
+
)
|
| 96 |
+
.copy()
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
impacted = (
|
| 100 |
+
df[df["is_degraded"]]
|
| 101 |
+
.groupby("site_code")["RAT"]
|
| 102 |
+
.nunique()
|
| 103 |
+
.rename("impacted_rats")
|
| 104 |
+
.reset_index()
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
resolved_pivot = (
|
| 108 |
+
df[df["is_resolved"]]
|
| 109 |
+
.pivot_table(
|
| 110 |
+
index="site_code",
|
| 111 |
+
columns="RAT",
|
| 112 |
+
values="KPI",
|
| 113 |
+
aggfunc="count",
|
| 114 |
+
fill_value=0,
|
| 115 |
+
)
|
| 116 |
+
.rename(columns=lambda c: f"resolved_{c}")
|
| 117 |
+
.reset_index()
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
base = pd.merge(base, impacted, on="site_code", how="left")
|
| 121 |
+
base["impacted_rats"] = base["impacted_rats"].fillna(0).astype(int)
|
| 122 |
+
|
| 123 |
+
degraded_pivot = (
|
| 124 |
+
df[df["is_degraded"]]
|
| 125 |
+
.pivot_table(
|
| 126 |
+
index="site_code",
|
| 127 |
+
columns="RAT",
|
| 128 |
+
values="KPI",
|
| 129 |
+
aggfunc="count",
|
| 130 |
+
fill_value=0,
|
| 131 |
+
)
|
| 132 |
+
.rename(columns=lambda c: f"degraded_{c}")
|
| 133 |
+
.reset_index()
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
persistent_pivot = (
|
| 137 |
+
df[df["is_persistent"]]
|
| 138 |
+
.pivot_table(
|
| 139 |
+
index="site_code",
|
| 140 |
+
columns="RAT",
|
| 141 |
+
values="KPI",
|
| 142 |
+
aggfunc="count",
|
| 143 |
+
fill_value=0,
|
| 144 |
+
)
|
| 145 |
+
.rename(columns=lambda c: f"persistent_{c}")
|
| 146 |
+
.reset_index()
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
out = base
|
| 150 |
+
if not degraded_pivot.empty:
|
| 151 |
+
out = pd.merge(out, degraded_pivot, on="site_code", how="left")
|
| 152 |
+
if not persistent_pivot.empty:
|
| 153 |
+
out = pd.merge(out, persistent_pivot, on="site_code", how="left")
|
| 154 |
+
if not resolved_pivot.empty:
|
| 155 |
+
out = pd.merge(out, resolved_pivot, on="site_code", how="left")
|
| 156 |
+
|
| 157 |
+
metric_cols = [c for c in out.columns if c != "City"]
|
| 158 |
+
out[metric_cols] = out[metric_cols].fillna(0)
|
| 159 |
+
|
| 160 |
+
resolved_total = (
|
| 161 |
+
out["resolved_kpis_total"].astype(float)
|
| 162 |
+
if "resolved_kpis_total" in out.columns
|
| 163 |
+
else 0.0
|
| 164 |
+
)
|
| 165 |
+
out["criticality_score"] = (
|
| 166 |
+
(
|
| 167 |
+
out["persistent_kpis_total"].astype(float) * 5.0
|
| 168 |
+
+ out["degraded_kpis_total"].astype(float) * 2.0
|
| 169 |
+
+ out["impacted_rats"].astype(float) * 1.0
|
| 170 |
+
+ resolved_total * 0.5
|
| 171 |
+
)
|
| 172 |
+
.round(0)
|
| 173 |
+
.astype(int)
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
out = out.sort_values(
|
| 177 |
+
by=[
|
| 178 |
+
"criticality_score",
|
| 179 |
+
"persistent_kpis_total",
|
| 180 |
+
"degraded_kpis_total",
|
| 181 |
+
"impacted_rats",
|
| 182 |
+
],
|
| 183 |
+
ascending=[False, False, False, False],
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
top = df[df["is_degraded"]].copy()
|
| 187 |
+
sev = {"PERSISTENT_DEGRADED": 2, "DEGRADED": 1}
|
| 188 |
+
top["severity"] = top["status"].map(sev).fillna(0).astype(int)
|
| 189 |
+
|
| 190 |
+
for col in ["bad_days_recent", "max_streak_recent"]:
|
| 191 |
+
if col not in top.columns:
|
| 192 |
+
top[col] = pd.NA
|
| 193 |
+
|
| 194 |
+
top["anomaly_score"] = (
|
| 195 |
+
(
|
| 196 |
+
top["severity"].astype(float) * 100.0
|
| 197 |
+
+ pd.to_numeric(top["max_streak_recent"], errors="coerce")
|
| 198 |
+
.fillna(0)
|
| 199 |
+
.astype(float)
|
| 200 |
+
* 10.0
|
| 201 |
+
+ pd.to_numeric(top["bad_days_recent"], errors="coerce")
|
| 202 |
+
.fillna(0)
|
| 203 |
+
.astype(float)
|
| 204 |
+
)
|
| 205 |
+
.round(0)
|
| 206 |
+
.astype(int)
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
top = top.sort_values(
|
| 210 |
+
by=["anomaly_score", "severity", "max_streak_recent", "bad_days_recent"],
|
| 211 |
+
ascending=[False, False, False, False],
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
try:
|
| 215 |
+
top = pd.merge(top, impacted, on="site_code", how="left")
|
| 216 |
+
top["impacted_rats"] = (
|
| 217 |
+
pd.to_numeric(top["impacted_rats"], errors="coerce").fillna(0).astype(int)
|
| 218 |
+
)
|
| 219 |
+
except Exception: # noqa: BLE001
|
| 220 |
+
top["impacted_rats"] = 0
|
| 221 |
+
|
| 222 |
+
top["rca_group"] = top["KPI"].apply(classify_kpi)
|
| 223 |
+
try:
|
| 224 |
+
top["rca_hint"] = top.apply(lambda r: _build_rca_hint(r.to_dict()), axis=1)
|
| 225 |
+
top["rca_tags"] = top.apply(lambda r: _build_rca_tags(r.to_dict()), axis=1)
|
| 226 |
+
except Exception: # noqa: BLE001
|
| 227 |
+
top["rca_hint"] = ""
|
| 228 |
+
top["rca_tags"] = ""
|
| 229 |
+
|
| 230 |
+
top_cols = [
|
| 231 |
+
c
|
| 232 |
+
for c in [
|
| 233 |
+
"anomaly_score",
|
| 234 |
+
"severity",
|
| 235 |
+
"RAT",
|
| 236 |
+
"site_code",
|
| 237 |
+
"City",
|
| 238 |
+
"KPI",
|
| 239 |
+
"rca_group",
|
| 240 |
+
"rca_tags",
|
| 241 |
+
"rca_hint",
|
| 242 |
+
"status",
|
| 243 |
+
"impacted_rats",
|
| 244 |
+
"baseline_median",
|
| 245 |
+
"recent_median",
|
| 246 |
+
"bad_days_recent",
|
| 247 |
+
"max_streak_recent",
|
| 248 |
+
]
|
| 249 |
+
if c in top.columns
|
| 250 |
+
]
|
| 251 |
+
top = top[top_cols].head(300)
|
| 252 |
+
|
| 253 |
+
return out, top
|
process_kpi/kpi_health_check/normalization.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
from utils.utils_vars import get_physical_db
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def to_numeric(series: pd.Series) -> pd.Series:
|
| 10 |
+
if pd.api.types.is_numeric_dtype(series):
|
| 11 |
+
return pd.to_numeric(series, errors="coerce")
|
| 12 |
+
s = series.astype(str)
|
| 13 |
+
s = s.str.replace("\u00a0", "", regex=False)
|
| 14 |
+
s = s.str.replace(" ", "", regex=False)
|
| 15 |
+
s = s.str.replace("%", "", regex=False)
|
| 16 |
+
s = s.replace({"nan": np.nan, "None": np.nan, "": np.nan})
|
| 17 |
+
|
| 18 |
+
has_comma = s.str.contains(",", na=False, regex=False)
|
| 19 |
+
has_dot = s.str.contains(".", na=False, regex=False)
|
| 20 |
+
both = has_comma & has_dot
|
| 21 |
+
if bool(both.any()):
|
| 22 |
+
last_comma = s.str.rfind(",")
|
| 23 |
+
last_dot = s.str.rfind(".")
|
| 24 |
+
euro = both & (last_comma > last_dot)
|
| 25 |
+
us = both & (last_dot > last_comma)
|
| 26 |
+
if bool(euro.any()):
|
| 27 |
+
s.loc[euro] = (
|
| 28 |
+
s.loc[euro]
|
| 29 |
+
.str.replace(".", "", regex=False)
|
| 30 |
+
.str.replace(",", ".", regex=False)
|
| 31 |
+
)
|
| 32 |
+
if bool(us.any()):
|
| 33 |
+
s.loc[us] = s.loc[us].str.replace(",", "", regex=False)
|
| 34 |
+
|
| 35 |
+
comma_only = has_comma & ~has_dot
|
| 36 |
+
if bool(comma_only.any()):
|
| 37 |
+
s.loc[comma_only] = s.loc[comma_only].str.replace(",", ".", regex=False)
|
| 38 |
+
return pd.to_numeric(s, errors="coerce")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def parse_datetime(series: pd.Series) -> pd.Series:
|
| 42 |
+
if series.empty:
|
| 43 |
+
return pd.to_datetime(series, errors="coerce")
|
| 44 |
+
first = series.dropna().astype(str).iloc[0] if series.dropna().any() else ""
|
| 45 |
+
|
| 46 |
+
formats: list[str | None] = []
|
| 47 |
+
if len(first) > 10:
|
| 48 |
+
formats.extend(
|
| 49 |
+
[
|
| 50 |
+
"%m.%d.%Y %H:%M:%S",
|
| 51 |
+
"%d.%m.%Y %H:%M:%S",
|
| 52 |
+
"%Y-%m-%d %H:%M:%S",
|
| 53 |
+
"%Y/%m/%d %H:%M:%S",
|
| 54 |
+
"%d/%m/%Y %H:%M:%S",
|
| 55 |
+
"%m/%d/%Y %H:%M:%S",
|
| 56 |
+
]
|
| 57 |
+
)
|
| 58 |
+
formats.extend(
|
| 59 |
+
[
|
| 60 |
+
"%m.%d.%Y",
|
| 61 |
+
"%d.%m.%Y",
|
| 62 |
+
"%Y-%m-%d",
|
| 63 |
+
"%Y/%m/%d",
|
| 64 |
+
"%d/%m/%Y",
|
| 65 |
+
"%m/%d/%Y",
|
| 66 |
+
]
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
for fmt in formats:
|
| 70 |
+
dt = pd.to_datetime(series, errors="coerce", format=fmt)
|
| 71 |
+
if dt.notna().any():
|
| 72 |
+
return dt
|
| 73 |
+
|
| 74 |
+
return pd.to_datetime(series, errors="coerce")
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def extract_site_code(value: object) -> int | None:
|
| 78 |
+
if value is None or (isinstance(value, float) and np.isnan(value)):
|
| 79 |
+
return None
|
| 80 |
+
s = str(value)
|
| 81 |
+
|
| 82 |
+
# Prefer explicit node patterns when present (DN strings often contain multiple numbers).
|
| 83 |
+
for pat in [
|
| 84 |
+
r"(?:WBTS)\D*(\d{3,7})",
|
| 85 |
+
r"(?:LNBTS)\D*(\d{3,7})",
|
| 86 |
+
r"(?:BTS)\D*(\d{3,7})",
|
| 87 |
+
r"(?:BCF)\D*(\d{3,7})",
|
| 88 |
+
r"(?:MRBTS)\D*(\d{3,7})",
|
| 89 |
+
r"(?:SBTS)\D*(\d{3,7})",
|
| 90 |
+
]:
|
| 91 |
+
m = re.search(pat, s, flags=re.IGNORECASE)
|
| 92 |
+
if m:
|
| 93 |
+
try:
|
| 94 |
+
return int(m.group(1))
|
| 95 |
+
except ValueError:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
# Fallback: accept 3-digit sites (common) while keeping the upper bound used previously.
|
| 99 |
+
m = re.search(r"(\d{3,7})", s)
|
| 100 |
+
if not m:
|
| 101 |
+
return None
|
| 102 |
+
try:
|
| 103 |
+
return int(m.group(1))
|
| 104 |
+
except ValueError:
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def infer_date_col(df: pd.DataFrame) -> str:
|
| 109 |
+
for c in ["PERIOD_START_TIME", "PERIOD_START_DATE", "date", "Date", "DATE"]:
|
| 110 |
+
if c in df.columns:
|
| 111 |
+
return c
|
| 112 |
+
raise ValueError("Cannot find a date column (expected PERIOD_START_TIME)")
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def infer_id_col(df: pd.DataFrame, rat: str) -> str:
|
| 116 |
+
rat_candidates = {
|
| 117 |
+
"2G": ["BCF name", "BCF", "BTS name", "BSC name", "DN"],
|
| 118 |
+
"3G": ["WBTS name", "WBTS ID", "DN"],
|
| 119 |
+
"LTE": ["LNBTS name", "MRBTS/SBTS name", "DN"],
|
| 120 |
+
"TWAMP": ["MRBTS name", "MRBTS/SBTS name", "LNBTS name", "DN"],
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
candidates = [c for c in rat_candidates.get(rat, []) if c in df.columns]
|
| 124 |
+
if not candidates and "DN" in df.columns:
|
| 125 |
+
candidates = ["DN"]
|
| 126 |
+
if not candidates:
|
| 127 |
+
raise ValueError(f"Cannot infer an entity/site column for {rat} dataset")
|
| 128 |
+
|
| 129 |
+
physical_codes: set[int] | None = None
|
| 130 |
+
try:
|
| 131 |
+
physical = load_physical_db()
|
| 132 |
+
if not physical.empty and "code" in physical.columns:
|
| 133 |
+
physical_codes = set(
|
| 134 |
+
pd.to_numeric(physical["code"], errors="coerce")
|
| 135 |
+
.dropna()
|
| 136 |
+
.astype(int)
|
| 137 |
+
.tolist()
|
| 138 |
+
)
|
| 139 |
+
except Exception:
|
| 140 |
+
physical_codes = None
|
| 141 |
+
|
| 142 |
+
if not physical_codes:
|
| 143 |
+
return candidates[0]
|
| 144 |
+
|
| 145 |
+
best_col = candidates[0]
|
| 146 |
+
best_score = -1.0
|
| 147 |
+
for c in candidates:
|
| 148 |
+
sample = df[c].head(2000)
|
| 149 |
+
codes = sample.apply(extract_site_code)
|
| 150 |
+
non_null = float(codes.notna().mean()) if len(codes) else 0.0
|
| 151 |
+
|
| 152 |
+
if physical_codes:
|
| 153 |
+
match = (
|
| 154 |
+
float(codes.dropna().astype(int).isin(physical_codes).mean())
|
| 155 |
+
if codes.notna().any()
|
| 156 |
+
else 0.0
|
| 157 |
+
)
|
| 158 |
+
score = match * 10.0 + non_null
|
| 159 |
+
else:
|
| 160 |
+
score = non_null
|
| 161 |
+
|
| 162 |
+
if score > best_score:
|
| 163 |
+
best_score = score
|
| 164 |
+
best_col = c
|
| 165 |
+
|
| 166 |
+
return best_col
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def non_kpi_identifier_cols(df: pd.DataFrame, rat: str) -> set[str]:
|
| 170 |
+
common = {
|
| 171 |
+
"DN",
|
| 172 |
+
"PLMN name",
|
| 173 |
+
"RNC name",
|
| 174 |
+
"BSC name",
|
| 175 |
+
"BCF name",
|
| 176 |
+
"MRBTS name",
|
| 177 |
+
"MRBTS/SBTS name",
|
| 178 |
+
"LNBTS name",
|
| 179 |
+
"WBTS name",
|
| 180 |
+
"WBTS ID",
|
| 181 |
+
}
|
| 182 |
+
rat_specific = {
|
| 183 |
+
"2G": {"BSC name", "BSC", "BCF name", "BCF", "BTS name"},
|
| 184 |
+
"3G": {"PLMN name", "RNC name", "WBTS name", "WBTS ID"},
|
| 185 |
+
"LTE": {"MRBTS/SBTS name", "LNBTS name"},
|
| 186 |
+
}
|
| 187 |
+
cols = set()
|
| 188 |
+
for c in common.union(rat_specific.get(rat, set())):
|
| 189 |
+
if c in df.columns:
|
| 190 |
+
cols.add(c)
|
| 191 |
+
return cols
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def infer_agg(kpi: str) -> str:
|
| 195 |
+
k = str(kpi).lower()
|
| 196 |
+
if any(x in k for x in ["traffic", "volume", "erl", "total", "gbytes", "gb"]):
|
| 197 |
+
return "sum"
|
| 198 |
+
return "mean"
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _is_availability_kpi(kpi: str) -> bool:
|
| 202 |
+
k = str(kpi).strip().lower()
|
| 203 |
+
if not k:
|
| 204 |
+
return False
|
| 205 |
+
return any(
|
| 206 |
+
x in k
|
| 207 |
+
for x in [
|
| 208 |
+
"availability",
|
| 209 |
+
"avail",
|
| 210 |
+
"unavailability",
|
| 211 |
+
"unavail",
|
| 212 |
+
"dispo",
|
| 213 |
+
"disponibil",
|
| 214 |
+
"uptime",
|
| 215 |
+
]
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def load_physical_db() -> pd.DataFrame:
|
| 220 |
+
physical_db = get_physical_db().copy()
|
| 221 |
+
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
|
| 222 |
+
physical_db["code"] = pd.to_numeric(physical_db["code"], errors="coerce")
|
| 223 |
+
physical_db = physical_db.dropna(subset=["code"])
|
| 224 |
+
physical_db["code"] = physical_db["code"].astype(int)
|
| 225 |
+
keep = [
|
| 226 |
+
c for c in ["code", "Longitude", "Latitude", "City"] if c in physical_db.columns
|
| 227 |
+
]
|
| 228 |
+
return physical_db[keep].drop_duplicates("code")
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def build_period_kpi(
|
| 232 |
+
df_raw: pd.DataFrame,
|
| 233 |
+
rat: str,
|
| 234 |
+
granularity: str = "Daily",
|
| 235 |
+
) -> tuple[pd.DataFrame, list[str]]:
|
| 236 |
+
df = df_raw.copy()
|
| 237 |
+
date_col = infer_date_col(df)
|
| 238 |
+
id_col = infer_id_col(df, rat)
|
| 239 |
+
|
| 240 |
+
df["date"] = parse_datetime(df[date_col])
|
| 241 |
+
df = df.dropna(subset=["date"])
|
| 242 |
+
|
| 243 |
+
g = str(granularity or "Daily").strip().lower()
|
| 244 |
+
if g.startswith("hour") or g.startswith("h"):
|
| 245 |
+
df["period_start"] = df["date"].dt.floor("h")
|
| 246 |
+
else:
|
| 247 |
+
df["period_start"] = df["date"].dt.floor("D")
|
| 248 |
+
|
| 249 |
+
df["site_code"] = df[id_col].apply(extract_site_code)
|
| 250 |
+
df = df.dropna(subset=["site_code"])
|
| 251 |
+
df["site_code"] = df["site_code"].astype(int)
|
| 252 |
+
|
| 253 |
+
meta = {date_col, id_col, "date", "site_code", "period_start"}
|
| 254 |
+
meta = meta.union(non_kpi_identifier_cols(df, rat))
|
| 255 |
+
candidate_cols = [c for c in df.columns if c not in meta]
|
| 256 |
+
|
| 257 |
+
numeric_cols: dict[str, pd.Series] = {}
|
| 258 |
+
for c in candidate_cols:
|
| 259 |
+
numeric_cols[c] = to_numeric(df[c])
|
| 260 |
+
|
| 261 |
+
numeric_df = pd.DataFrame(numeric_cols)
|
| 262 |
+
for c in list(numeric_df.columns):
|
| 263 |
+
if _is_availability_kpi(c):
|
| 264 |
+
numeric_df[c] = numeric_df[c].fillna(0.0)
|
| 265 |
+
kpi_cols = [c for c in numeric_df.columns if numeric_df[c].notna().any()]
|
| 266 |
+
if not kpi_cols:
|
| 267 |
+
raise ValueError(f"No numeric KPI columns detected for {rat}")
|
| 268 |
+
|
| 269 |
+
base = pd.concat(
|
| 270 |
+
[
|
| 271 |
+
df[["site_code", "period_start"]].reset_index(drop=True),
|
| 272 |
+
numeric_df[kpi_cols].reset_index(drop=True),
|
| 273 |
+
],
|
| 274 |
+
axis=1,
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
agg_dict = {k: infer_agg(k) for k in kpi_cols}
|
| 278 |
+
out = base.groupby(["site_code", "period_start"], as_index=False).agg(agg_dict)
|
| 279 |
+
out["date_only"] = pd.to_datetime(out["period_start"]).dt.date
|
| 280 |
+
|
| 281 |
+
physical = load_physical_db()
|
| 282 |
+
if not physical.empty:
|
| 283 |
+
out = pd.merge(out, physical, left_on="site_code", right_on="code", how="left")
|
| 284 |
+
out = out.drop(columns=[c for c in ["code"] if c in out.columns])
|
| 285 |
+
|
| 286 |
+
out["RAT"] = rat
|
| 287 |
+
|
| 288 |
+
return out, kpi_cols
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
|
| 292 |
+
return build_period_kpi(df_raw, rat, granularity="Daily")
|
process_kpi/kpi_health_check/presets.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def presets_dir() -> str:
|
| 9 |
+
root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 10 |
+
return os.path.join(root, "data", "kpi_health_check_presets")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _safe_name(name: str) -> str:
|
| 14 |
+
s = (name or "").strip()
|
| 15 |
+
s = s.replace("..", "")
|
| 16 |
+
s = s.replace("/", "_").replace("\\", "_")
|
| 17 |
+
s = "_".join([p for p in s.split() if p])
|
| 18 |
+
return s
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def list_presets() -> list[str]:
|
| 22 |
+
d = presets_dir()
|
| 23 |
+
if not os.path.isdir(d):
|
| 24 |
+
return []
|
| 25 |
+
out = []
|
| 26 |
+
for fn in os.listdir(d):
|
| 27 |
+
if fn.lower().endswith(".json"):
|
| 28 |
+
out.append(os.path.splitext(fn)[0])
|
| 29 |
+
return sorted(set(out))
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def load_preset(name: str) -> pd.DataFrame:
|
| 33 |
+
d = presets_dir()
|
| 34 |
+
safe = _safe_name(name)
|
| 35 |
+
path = os.path.join(d, f"{safe}.json")
|
| 36 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 37 |
+
obj = json.load(f)
|
| 38 |
+
rows = obj.get("rules", []) if isinstance(obj, dict) else []
|
| 39 |
+
df = pd.DataFrame(rows)
|
| 40 |
+
if not df.empty:
|
| 41 |
+
df["RAT"] = df["RAT"].astype(str)
|
| 42 |
+
df["KPI"] = df["KPI"].astype(str)
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def save_preset(name: str, rules_df: pd.DataFrame) -> str:
|
| 47 |
+
safe = _safe_name(name)
|
| 48 |
+
if not safe:
|
| 49 |
+
raise ValueError("Preset name is empty")
|
| 50 |
+
|
| 51 |
+
d = presets_dir()
|
| 52 |
+
os.makedirs(d, exist_ok=True)
|
| 53 |
+
path = os.path.join(d, f"{safe}.json")
|
| 54 |
+
|
| 55 |
+
df = rules_df.copy() if isinstance(rules_df, pd.DataFrame) else pd.DataFrame()
|
| 56 |
+
if df.empty:
|
| 57 |
+
raise ValueError("Rules dataframe is empty")
|
| 58 |
+
|
| 59 |
+
keep = [c for c in ["RAT", "KPI", "direction", "sla", "policy"] if c in df.columns]
|
| 60 |
+
df = df[keep].copy()
|
| 61 |
+
|
| 62 |
+
obj = {
|
| 63 |
+
"name": safe,
|
| 64 |
+
"saved_at": datetime.utcnow().isoformat() + "Z",
|
| 65 |
+
"rules": df.to_dict(orient="records"),
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 69 |
+
json.dump(obj, f, ensure_ascii=False, indent=2)
|
| 70 |
+
|
| 71 |
+
return path
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def delete_preset(name: str) -> None:
|
| 75 |
+
d = presets_dir()
|
| 76 |
+
safe = _safe_name(name)
|
| 77 |
+
path = os.path.join(d, f"{safe}.json")
|
| 78 |
+
if os.path.isfile(path):
|
| 79 |
+
os.remove(path)
|
process_kpi/kpi_health_check/profiles.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def profiles_dir() -> str:
|
| 7 |
+
root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 8 |
+
return os.path.join(root, "data", "kpi_health_check_profiles")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _safe_name(name: str) -> str:
|
| 12 |
+
s = (name or "").strip()
|
| 13 |
+
s = s.replace("..", "")
|
| 14 |
+
s = s.replace("/", "_").replace("\\", "_")
|
| 15 |
+
s = "_".join([p for p in s.split() if p])
|
| 16 |
+
return s
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def list_profiles() -> list[str]:
|
| 20 |
+
d = profiles_dir()
|
| 21 |
+
if not os.path.isdir(d):
|
| 22 |
+
return []
|
| 23 |
+
out: list[str] = []
|
| 24 |
+
for fn in os.listdir(d):
|
| 25 |
+
if fn.lower().endswith(".json"):
|
| 26 |
+
out.append(os.path.splitext(fn)[0])
|
| 27 |
+
return sorted(set(out))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def load_profile(name: str) -> dict:
|
| 31 |
+
d = profiles_dir()
|
| 32 |
+
safe = _safe_name(name)
|
| 33 |
+
path = os.path.join(d, f"{safe}.json")
|
| 34 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 35 |
+
obj = json.load(f)
|
| 36 |
+
if isinstance(obj, dict) and "config" in obj and isinstance(obj["config"], dict):
|
| 37 |
+
return obj["config"]
|
| 38 |
+
if isinstance(obj, dict):
|
| 39 |
+
return obj
|
| 40 |
+
return {}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def save_profile(name: str, config: dict) -> str:
|
| 44 |
+
safe = _safe_name(name)
|
| 45 |
+
if not safe:
|
| 46 |
+
raise ValueError("Profile name is empty")
|
| 47 |
+
if config is None or not isinstance(config, dict) or not config:
|
| 48 |
+
raise ValueError("Profile config is empty")
|
| 49 |
+
|
| 50 |
+
d = profiles_dir()
|
| 51 |
+
os.makedirs(d, exist_ok=True)
|
| 52 |
+
path = os.path.join(d, f"{safe}.json")
|
| 53 |
+
|
| 54 |
+
obj = {
|
| 55 |
+
"name": safe,
|
| 56 |
+
"saved_at": datetime.utcnow().isoformat() + "Z",
|
| 57 |
+
"config": config,
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 61 |
+
json.dump(obj, f, ensure_ascii=False, indent=2)
|
| 62 |
+
|
| 63 |
+
return path
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def delete_profile(name: str) -> None:
|
| 67 |
+
d = profiles_dir()
|
| 68 |
+
safe = _safe_name(name)
|
| 69 |
+
path = os.path.join(d, f"{safe}.json")
|
| 70 |
+
if os.path.isfile(path):
|
| 71 |
+
os.remove(path)
|
process_kpi/kpi_health_check/rules.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import re
|
| 4 |
+
from functools import lru_cache
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def _norm(value: str) -> str:
|
| 8 |
+
s = str(value or "").strip().lower()
|
| 9 |
+
s = re.sub(r"[^0-9a-z]+", " ", s)
|
| 10 |
+
s = re.sub(r"\s+", " ", s).strip()
|
| 11 |
+
return s
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _project_root() -> str:
|
| 15 |
+
return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _load_curated_rows() -> list[dict]:
|
| 19 |
+
base_dir = os.path.join(_project_root(), "data", "kpi_health_check_presets")
|
| 20 |
+
candidates = [
|
| 21 |
+
os.path.join(base_dir, "presets_1.json"),
|
| 22 |
+
os.path.join(base_dir, "profil_1.json"),
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
path = next((p for p in candidates if os.path.exists(p)), None)
|
| 26 |
+
if not path:
|
| 27 |
+
return []
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 31 |
+
txt = f.read()
|
| 32 |
+
txt = re.sub(r"\bNaN\b", "null", txt)
|
| 33 |
+
obj = json.loads(txt)
|
| 34 |
+
rows = obj.get("rules", []) if isinstance(obj, dict) else []
|
| 35 |
+
return rows if isinstance(rows, list) else []
|
| 36 |
+
except Exception: # noqa: BLE001
|
| 37 |
+
return []
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@lru_cache(maxsize=1)
|
| 41 |
+
def _curated_rules_map() -> dict[tuple[str, str], dict]:
|
| 42 |
+
out: dict[tuple[str, str], dict] = {}
|
| 43 |
+
for r in _load_curated_rows():
|
| 44 |
+
if not isinstance(r, dict):
|
| 45 |
+
continue
|
| 46 |
+
rat = _norm(r.get("RAT"))
|
| 47 |
+
kpi = _norm(r.get("KPI"))
|
| 48 |
+
if not rat or not kpi:
|
| 49 |
+
continue
|
| 50 |
+
|
| 51 |
+
direction = str(r.get("direction") or "").strip()
|
| 52 |
+
policy_raw = str(r.get("policy") or "").strip().lower()
|
| 53 |
+
policy = policy_raw if policy_raw in {"enforce", "notify"} else None
|
| 54 |
+
sla_raw = r.get("sla", None)
|
| 55 |
+
try:
|
| 56 |
+
sla = float(sla_raw) if sla_raw is not None else None
|
| 57 |
+
except Exception: # noqa: BLE001
|
| 58 |
+
sla = None
|
| 59 |
+
|
| 60 |
+
out[(rat, kpi)] = {
|
| 61 |
+
"direction": direction or None,
|
| 62 |
+
"sla": sla,
|
| 63 |
+
"policy": policy,
|
| 64 |
+
}
|
| 65 |
+
return out
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _curated_rule(kpi: str, rat: str | None = None) -> dict | None:
|
| 69 |
+
if not kpi or not rat:
|
| 70 |
+
return None
|
| 71 |
+
key = (_norm(rat), _norm(kpi))
|
| 72 |
+
return _curated_rules_map().get(key)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def infer_kpi_direction(kpi: str, rat: str | None = None) -> str:
|
| 76 |
+
curated = _curated_rule(kpi, rat)
|
| 77 |
+
if curated and curated.get("direction"):
|
| 78 |
+
return str(curated["direction"])
|
| 79 |
+
|
| 80 |
+
k = _norm(kpi)
|
| 81 |
+
if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
|
| 82 |
+
return "lower_is_better"
|
| 83 |
+
lower_is_better = [
|
| 84 |
+
"drop",
|
| 85 |
+
"dcr",
|
| 86 |
+
"blocking",
|
| 87 |
+
"block",
|
| 88 |
+
"congestion",
|
| 89 |
+
"loss",
|
| 90 |
+
"lost",
|
| 91 |
+
"discard",
|
| 92 |
+
"rtwp",
|
| 93 |
+
"prb usage",
|
| 94 |
+
"usage",
|
| 95 |
+
"fail",
|
| 96 |
+
]
|
| 97 |
+
if any(x in k for x in lower_is_better):
|
| 98 |
+
return "lower_is_better"
|
| 99 |
+
return "higher_is_better"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def infer_kpi_sla(kpi: str, direction: str, rat: str | None = None) -> float | None:
|
| 103 |
+
curated = _curated_rule(kpi, rat)
|
| 104 |
+
if curated and curated.get("sla") is not None:
|
| 105 |
+
try:
|
| 106 |
+
return float(curated["sla"])
|
| 107 |
+
except Exception: # noqa: BLE001
|
| 108 |
+
pass
|
| 109 |
+
|
| 110 |
+
k = _norm(kpi)
|
| 111 |
+
if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
|
| 112 |
+
return 1000.0
|
| 113 |
+
if direction == "higher_is_better" and any(
|
| 114 |
+
x in k for x in ["availability", "cssr", "success", " sr"]
|
| 115 |
+
):
|
| 116 |
+
return 98.0
|
| 117 |
+
if direction == "lower_is_better" and any(
|
| 118 |
+
x in k for x in ["drop", "dcr", "blocking", "congestion", "loss", "discard"]
|
| 119 |
+
):
|
| 120 |
+
return 2.0
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def infer_kpi_policy(kpi: str, rat: str | None = None) -> str:
|
| 125 |
+
curated = _curated_rule(kpi, rat)
|
| 126 |
+
if curated and curated.get("policy"):
|
| 127 |
+
return str(curated["policy"])
|
| 128 |
+
|
| 129 |
+
k = _norm(kpi)
|
| 130 |
+
if "distance" in k:
|
| 131 |
+
return "notify"
|
| 132 |
+
return "enforce"
|
process_kpi/lte_kpi_requirements.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LTE CAPACITY REPORT
|
| 2 |
+
|
| 3 |
+
Based on gsm and wcdma exemple let's build LTE capacity report
|
| 4 |
+
|
| 5 |
+
## Required Input
|
| 6 |
+
|
| 7 |
+
- File : LTE BH report with columns :
|
| 8 |
+
- PERIOD_START_TIME
|
| 9 |
+
- MRBTS/SBTS name
|
| 10 |
+
- LNBTS name
|
| 11 |
+
- LNCEL name
|
| 12 |
+
- DN
|
| 13 |
+
- Cell Avail excl BLU
|
| 14 |
+
- E-UTRAN Avg PRB usage per TTI DL
|
| 15 |
+
- Number of last day for the analysis
|
| 16 |
+
- Number of days for threshold
|
| 17 |
+
- Availability threshold
|
| 18 |
+
- PRB usage per TTI DL threshold
|
| 19 |
+
- Max difference between PRB usage over cells of the same BTS
|
| 20 |
+
|
| 21 |
+
### TASK
|
| 22 |
+
|
| 23 |
+
- Pivot KPI in BH report per KPI (Cell Avail excl BLU, E-UTRAN Avg PRB usage per TTI DL)
|
| 24 |
+
- Calculate Average and Max of PRB usage per TTI DL
|
| 25 |
+
- Calculate Average and Max of Cell Avail excl BLU
|
| 26 |
+
- Count number of Days with Cell Avail excl BLU below Availability threshold
|
| 27 |
+
- Count number of Days with PRB usage per TTI DL exceeded PRB usage per TTI DL threshold
|
| 28 |
+
- Create separate DF per sector and band based on LNCEL name
|
| 29 |
+
- _1_L800: column_name = Sector_1_L800
|
| 30 |
+
- _2_L800: column_name = Sector_2_L800
|
| 31 |
+
- _3_L800: column_name = Sector_3_L800
|
| 32 |
+
- _1_L1800: column_name = Sector_1_L1800
|
| 33 |
+
- _2_L1800: column_name = Sector_2_L1800
|
| 34 |
+
- _3_L1800: column_name = Sector_3_L1800
|
| 35 |
+
- _1_L2300: column_name = Sector_1_L2300
|
| 36 |
+
- _2_L2300: column_name = Sector_2_L2300
|
| 37 |
+
- _3_L2300: column_name = Sector_3_L2300
|
| 38 |
+
- _1_L2600: column_name = Sector_1_L2600
|
| 39 |
+
- _2_L2600: column_name = Sector_2_L2600
|
| 40 |
+
- _3_L2600: column_name = Sector_3_L2600
|
| 41 |
+
- _1S_L1800: column_name = Sector_1S_L1800
|
| 42 |
+
- _2S_L1800: column_name = Sector_2S_L1800
|
| 43 |
+
- _3S_L1800: column_name = Sector_3S_L1800
|
| 44 |
+
- Merge DFs per sector LNBTS name
|
| 45 |
+
- Concat dfs per Bands
|
| 46 |
+
|
process_kpi/process_gsm_capacity.py
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from queries.process_gsm import combined_gsm_database
|
| 5 |
+
from utils.check_sheet_exist import execute_checks_sheets_exist
|
| 6 |
+
from utils.convert_to_excel import convert_dfs, save_dataframe
|
| 7 |
+
from utils.kpi_analysis_utils import (
|
| 8 |
+
GsmAnalysis,
|
| 9 |
+
GsmCapacity,
|
| 10 |
+
analyze_sdcch_call_blocking,
|
| 11 |
+
analyze_tch_abis_fails,
|
| 12 |
+
analyze_tch_call_blocking,
|
| 13 |
+
cell_availability_analysis,
|
| 14 |
+
combine_comments,
|
| 15 |
+
create_daily_date,
|
| 16 |
+
create_dfs_per_kpi,
|
| 17 |
+
create_hourly_date,
|
| 18 |
+
kpi_naming_cleaning,
|
| 19 |
+
)
|
| 20 |
+
from utils.utils_functions import calculate_distances
|
| 21 |
+
|
| 22 |
+
GSM_ANALYSIS_COLUMNS = [
|
| 23 |
+
"ID_BTS",
|
| 24 |
+
"site_name",
|
| 25 |
+
"name",
|
| 26 |
+
"BSC",
|
| 27 |
+
"BCF",
|
| 28 |
+
"BTS",
|
| 29 |
+
"code",
|
| 30 |
+
"Region",
|
| 31 |
+
"adminState",
|
| 32 |
+
"frequencyBandInUse",
|
| 33 |
+
"cellId",
|
| 34 |
+
"band",
|
| 35 |
+
"site_config_band",
|
| 36 |
+
"trxRfPower",
|
| 37 |
+
"BCCH",
|
| 38 |
+
"Longitude",
|
| 39 |
+
"Latitude",
|
| 40 |
+
"TRX_TCH",
|
| 41 |
+
"MAL_TCH",
|
| 42 |
+
"amrSegLoadDepTchRateLower",
|
| 43 |
+
"amrSegLoadDepTchRateUpper",
|
| 44 |
+
"btsSpLoadDepTchRateLower",
|
| 45 |
+
"btsSpLoadDepTchRateUpper",
|
| 46 |
+
"amrWbFrCodecModeSet",
|
| 47 |
+
"dedicatedGPRScapacity",
|
| 48 |
+
"defaultGPRScapacity",
|
| 49 |
+
"number_trx_per_cell",
|
| 50 |
+
"number_trx_per_bcf",
|
| 51 |
+
"number_tch_per_cell",
|
| 52 |
+
"number_sd_per_cell",
|
| 53 |
+
"number_bcch_per_cell",
|
| 54 |
+
"number_ccch_per_cell",
|
| 55 |
+
"number_cbc_per_cell",
|
| 56 |
+
"number_total_channels_per_cell",
|
| 57 |
+
"number_signals_per_cell",
|
| 58 |
+
"hf_rate_coef",
|
| 59 |
+
"GPRS",
|
| 60 |
+
"TCH Actual HR%",
|
| 61 |
+
"Offered Traffic BH",
|
| 62 |
+
"Max_Traffic BH",
|
| 63 |
+
"Avg_Traffic BH",
|
| 64 |
+
"TCH UTILIZATION (@Max Traffic)",
|
| 65 |
+
"Tch utilization comments",
|
| 66 |
+
"ErlabngB_value",
|
| 67 |
+
"Target FR CHs",
|
| 68 |
+
"Target HR CHs",
|
| 69 |
+
"Target TCHs",
|
| 70 |
+
"Target TRXs",
|
| 71 |
+
"Number of required TRXs",
|
| 72 |
+
"max_tch_call_blocking_bh",
|
| 73 |
+
"avg_tch_call_blocking_bh",
|
| 74 |
+
"number_of_days_with_tch_blocking_exceeded_bh",
|
| 75 |
+
"tch_call_blocking_bh_comment",
|
| 76 |
+
"max_sdcch_real_blocking_bh",
|
| 77 |
+
"avg_sdcch_real_blocking_bh",
|
| 78 |
+
"number_of_days_with_sdcch_blocking_exceeded_bh",
|
| 79 |
+
"sdcch_real_blocking_bh_comment",
|
| 80 |
+
"Average_cell_availability_bh",
|
| 81 |
+
"number_of_days_exceeding_availability_threshold_bh",
|
| 82 |
+
"availability_comment_bh",
|
| 83 |
+
"max_tch_abis_fail_bh",
|
| 84 |
+
"avg_tch_abis_fail_bh",
|
| 85 |
+
"number_of_days_with_tch_abis_fail_exceeded_bh",
|
| 86 |
+
"tch_abis_fail_bh_comment",
|
| 87 |
+
"Average_cell_availability_daily",
|
| 88 |
+
"number_of_days_exceeding_availability_threshold_daily",
|
| 89 |
+
"availability_comment_daily",
|
| 90 |
+
"max_tch_abis_fail_daily",
|
| 91 |
+
"avg_tch_abis_fail_daily",
|
| 92 |
+
"number_of_days_with_tch_abis_fail_exceeded_daily",
|
| 93 |
+
"tch_abis_fail_daily_comment",
|
| 94 |
+
"BH Congestion status",
|
| 95 |
+
"operational_comment",
|
| 96 |
+
"Final comment",
|
| 97 |
+
"Final comment summary",
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
OPERATIONAL_NEIGHBOURS_COLUMNS = [
|
| 101 |
+
"ID_BTS",
|
| 102 |
+
"name",
|
| 103 |
+
"operational_comment",
|
| 104 |
+
"BH Congestion status",
|
| 105 |
+
"Longitude",
|
| 106 |
+
"Latitude",
|
| 107 |
+
]
|
| 108 |
+
|
| 109 |
+
GSM_COLUMNS = [
|
| 110 |
+
"ID_BTS",
|
| 111 |
+
"site_name",
|
| 112 |
+
"name",
|
| 113 |
+
"BSC",
|
| 114 |
+
"BCF",
|
| 115 |
+
"BTS",
|
| 116 |
+
"code",
|
| 117 |
+
"Region",
|
| 118 |
+
"adminState",
|
| 119 |
+
"frequencyBandInUse",
|
| 120 |
+
"amrSegLoadDepTchRateLower",
|
| 121 |
+
"amrSegLoadDepTchRateUpper",
|
| 122 |
+
"btsSpLoadDepTchRateLower",
|
| 123 |
+
"btsSpLoadDepTchRateUpper",
|
| 124 |
+
"amrWbFrCodecModeSet",
|
| 125 |
+
"dedicatedGPRScapacity",
|
| 126 |
+
"defaultGPRScapacity",
|
| 127 |
+
"cellId",
|
| 128 |
+
"band",
|
| 129 |
+
"site_config_band",
|
| 130 |
+
"trxRfPower",
|
| 131 |
+
"BCCH",
|
| 132 |
+
"number_trx_per_cell",
|
| 133 |
+
"number_trx_per_bcf",
|
| 134 |
+
"TRX_TCH",
|
| 135 |
+
"MAL_TCH",
|
| 136 |
+
"Longitude",
|
| 137 |
+
"Latitude",
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
TRX_COLUMNS = [
|
| 141 |
+
"ID_BTS",
|
| 142 |
+
"number_tch_per_cell",
|
| 143 |
+
"number_sd_per_cell",
|
| 144 |
+
"number_bcch_per_cell",
|
| 145 |
+
"number_ccch_per_cell",
|
| 146 |
+
"number_cbc_per_cell",
|
| 147 |
+
"number_total_channels_per_cell",
|
| 148 |
+
"number_signals_per_cell",
|
| 149 |
+
]
|
| 150 |
+
|
| 151 |
+
KPI_COLUMNS = [
|
| 152 |
+
"date",
|
| 153 |
+
"BTS_name",
|
| 154 |
+
"TCH_availability_ratio",
|
| 155 |
+
"2G_Carried_Traffic",
|
| 156 |
+
"TCH_call_blocking",
|
| 157 |
+
"TCH_ABIS_FAIL_CALL_c001084",
|
| 158 |
+
"SDCCH_real_blocking",
|
| 159 |
+
]
|
| 160 |
+
BH_COLUMNS_FOR_CAPACITY = [
|
| 161 |
+
"Max_Traffic BH",
|
| 162 |
+
"Avg_Traffic BH",
|
| 163 |
+
"max_tch_call_blocking_bh",
|
| 164 |
+
"avg_tch_call_blocking_bh",
|
| 165 |
+
"number_of_days_with_tch_blocking_exceeded_bh",
|
| 166 |
+
"tch_call_blocking_bh_comment",
|
| 167 |
+
"max_sdcch_real_blocking_bh",
|
| 168 |
+
"avg_sdcch_real_blocking_bh",
|
| 169 |
+
"number_of_days_with_sdcch_blocking_exceeded_bh",
|
| 170 |
+
"sdcch_real_blocking_bh_comment",
|
| 171 |
+
"Average_cell_availability_bh",
|
| 172 |
+
"number_of_days_exceeding_availability_threshold_bh",
|
| 173 |
+
"availability_comment_bh",
|
| 174 |
+
"max_tch_abis_fail_bh",
|
| 175 |
+
"avg_tch_abis_fail_bh",
|
| 176 |
+
"number_of_days_with_tch_abis_fail_exceeded_bh",
|
| 177 |
+
"tch_abis_fail_bh_comment",
|
| 178 |
+
]
|
| 179 |
+
|
| 180 |
+
DAILY_COLUMNS_FOR_CAPACITY = [
|
| 181 |
+
"Average_cell_availability_daily",
|
| 182 |
+
"number_of_days_exceeding_availability_threshold_daily",
|
| 183 |
+
"availability_comment_daily",
|
| 184 |
+
"max_tch_abis_fail_daily",
|
| 185 |
+
"avg_tch_abis_fail_daily",
|
| 186 |
+
"number_of_days_with_tch_abis_fail_exceeded_daily",
|
| 187 |
+
"tch_abis_fail_daily_comment",
|
| 188 |
+
]
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def bh_traffic_analysis(
|
| 192 |
+
df: pd.DataFrame,
|
| 193 |
+
number_of_kpi_days: int,
|
| 194 |
+
) -> pd.DataFrame:
|
| 195 |
+
|
| 196 |
+
result_df = df.copy()
|
| 197 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 198 |
+
# last_days_df = last_days_df.fillna(0)
|
| 199 |
+
|
| 200 |
+
result_df["Avg_Traffic BH"] = last_days_df.mean(axis=1).round(2)
|
| 201 |
+
result_df["Max_Traffic BH"] = last_days_df.max(axis=1)
|
| 202 |
+
return result_df
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def bh_dfs_per_kpi(
|
| 206 |
+
df: pd.DataFrame,
|
| 207 |
+
number_of_kpi_days: int = 7,
|
| 208 |
+
tch_blocking_threshold: int = 0.50,
|
| 209 |
+
sdcch_blocking_threshold: int = 0.50,
|
| 210 |
+
number_of_threshold_days: int = 3,
|
| 211 |
+
tch_abis_fails_threshold: int = 10,
|
| 212 |
+
availability_threshold: int = 95,
|
| 213 |
+
) -> pd.DataFrame:
|
| 214 |
+
"""
|
| 215 |
+
Create pivoted DataFrames for each KPI and perform analysis.
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
df: DataFrame containing KPI data
|
| 219 |
+
number_of_kpi_days: Number of days to analyze
|
| 220 |
+
threshold: Utilization threshold percentage for flagging
|
| 221 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 222 |
+
|
| 223 |
+
Returns:
|
| 224 |
+
DataFrame with combined analysis results
|
| 225 |
+
"""
|
| 226 |
+
pivoted_kpi_dfs = {}
|
| 227 |
+
|
| 228 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 229 |
+
df=df,
|
| 230 |
+
pivot_date_column="date",
|
| 231 |
+
pivot_name_column="BTS_name",
|
| 232 |
+
kpi_columns_from=2,
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
|
| 236 |
+
sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
|
| 237 |
+
Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
|
| 238 |
+
tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
|
| 239 |
+
tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
|
| 240 |
+
|
| 241 |
+
# ANALISYS
|
| 242 |
+
|
| 243 |
+
tch_call_blocking_df = analyze_tch_call_blocking(
|
| 244 |
+
df=tch_call_blocking_df,
|
| 245 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 246 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 247 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 248 |
+
analysis_type="BH",
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
sdcch_real_blocking_df = analyze_sdcch_call_blocking(
|
| 252 |
+
df=sdcch_real_blocking_df,
|
| 253 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 254 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 255 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 256 |
+
analysis_type="BH",
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
Carried_Traffic_df = bh_traffic_analysis(
|
| 260 |
+
df=Carried_Traffic_df,
|
| 261 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
tch_abis_fails_df = analyze_tch_abis_fails(
|
| 265 |
+
df=tch_abis_fails_df,
|
| 266 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 267 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 268 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 269 |
+
analysis_type="BH",
|
| 270 |
+
)
|
| 271 |
+
tch_availability_ratio_df = cell_availability_analysis(
|
| 272 |
+
df=tch_availability_ratio_df,
|
| 273 |
+
days=number_of_kpi_days,
|
| 274 |
+
availability_threshold=availability_threshold,
|
| 275 |
+
analysis_type="BH",
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
bh_kpi_df = pd.concat(
|
| 279 |
+
[
|
| 280 |
+
Carried_Traffic_df,
|
| 281 |
+
tch_call_blocking_df,
|
| 282 |
+
sdcch_real_blocking_df,
|
| 283 |
+
tch_availability_ratio_df,
|
| 284 |
+
tch_abis_fails_df,
|
| 285 |
+
],
|
| 286 |
+
axis=1,
|
| 287 |
+
)
|
| 288 |
+
return bh_kpi_df
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def analyse_bh_data(
|
| 292 |
+
bh_report_path: str,
|
| 293 |
+
number_of_kpi_days: int,
|
| 294 |
+
tch_blocking_threshold: int,
|
| 295 |
+
sdcch_blocking_threshold: int,
|
| 296 |
+
number_of_threshold_days: int,
|
| 297 |
+
tch_abis_fails_threshold: int,
|
| 298 |
+
availability_threshold: int,
|
| 299 |
+
) -> pd.DataFrame:
|
| 300 |
+
df = pd.read_csv(bh_report_path, delimiter=";")
|
| 301 |
+
df = kpi_naming_cleaning(df)
|
| 302 |
+
df = create_hourly_date(df)
|
| 303 |
+
df = df[KPI_COLUMNS]
|
| 304 |
+
df = bh_dfs_per_kpi(
|
| 305 |
+
df=df,
|
| 306 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 307 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 308 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 309 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 310 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 311 |
+
availability_threshold=availability_threshold,
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
bh_df_for_capacity = df.copy()
|
| 315 |
+
bh_df_for_capacity = bh_df_for_capacity[BH_COLUMNS_FOR_CAPACITY]
|
| 316 |
+
bh_df_for_capacity = bh_df_for_capacity.reset_index()
|
| 317 |
+
|
| 318 |
+
# If columns have multiple levels (MultiIndex), flatten them
|
| 319 |
+
if isinstance(bh_df_for_capacity.columns, pd.MultiIndex):
|
| 320 |
+
bh_df_for_capacity.columns = [
|
| 321 |
+
"_".join([str(el) for el in col if el])
|
| 322 |
+
for col in bh_df_for_capacity.columns.values
|
| 323 |
+
]
|
| 324 |
+
# bh_df_for_capacity = bh_df_for_capacity.reset_index()
|
| 325 |
+
|
| 326 |
+
# rename Bts_name to name
|
| 327 |
+
bh_df_for_capacity = bh_df_for_capacity.rename(columns={"BTS_name": "name"})
|
| 328 |
+
|
| 329 |
+
return [bh_df_for_capacity, df]
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def daily_dfs_per_kpi(
|
| 333 |
+
df: pd.DataFrame,
|
| 334 |
+
number_of_kpi_days: int = 7,
|
| 335 |
+
availability_threshold: int = 95,
|
| 336 |
+
number_of_threshold_days: int = 3,
|
| 337 |
+
tch_abis_fails_threshold: int = 10,
|
| 338 |
+
sdcch_blocking_threshold: int = 0.5,
|
| 339 |
+
tch_blocking_threshold: int = 0.5,
|
| 340 |
+
) -> pd.DataFrame:
|
| 341 |
+
"""
|
| 342 |
+
Create pivoted DataFrames for each KPI and perform analysis.
|
| 343 |
+
|
| 344 |
+
Args:
|
| 345 |
+
df: DataFrame containing KPI data
|
| 346 |
+
number_of_kpi_days: Number of days to analyze
|
| 347 |
+
threshold: Utilization threshold percentage for flagging
|
| 348 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 349 |
+
|
| 350 |
+
Returns:
|
| 351 |
+
DataFrame with combined analysis results
|
| 352 |
+
"""
|
| 353 |
+
pivoted_kpi_dfs = {}
|
| 354 |
+
|
| 355 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 356 |
+
df=df,
|
| 357 |
+
pivot_date_column="date",
|
| 358 |
+
pivot_name_column="BTS_name",
|
| 359 |
+
kpi_columns_from=2,
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
|
| 363 |
+
sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
|
| 364 |
+
Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
|
| 365 |
+
tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
|
| 366 |
+
tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
|
| 367 |
+
|
| 368 |
+
tch_availability_ratio_df = cell_availability_analysis(
|
| 369 |
+
df=tch_availability_ratio_df,
|
| 370 |
+
days=number_of_kpi_days,
|
| 371 |
+
availability_threshold=availability_threshold,
|
| 372 |
+
)
|
| 373 |
+
sdcch_real_blocking_df = analyze_sdcch_call_blocking(
|
| 374 |
+
df=sdcch_real_blocking_df,
|
| 375 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 376 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 377 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 378 |
+
analysis_type="Daily",
|
| 379 |
+
)
|
| 380 |
+
tch_call_blocking_df = analyze_tch_call_blocking(
|
| 381 |
+
df=tch_call_blocking_df,
|
| 382 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 383 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 384 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 385 |
+
analysis_type="Daily",
|
| 386 |
+
)
|
| 387 |
+
tch_abis_fails_df = analyze_tch_abis_fails(
|
| 388 |
+
df=tch_abis_fails_df,
|
| 389 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 390 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 391 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 392 |
+
analysis_type="Daily",
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
daily_kpi_df = pd.concat(
|
| 396 |
+
[
|
| 397 |
+
tch_availability_ratio_df,
|
| 398 |
+
Carried_Traffic_df,
|
| 399 |
+
tch_call_blocking_df,
|
| 400 |
+
sdcch_real_blocking_df,
|
| 401 |
+
tch_abis_fails_df,
|
| 402 |
+
],
|
| 403 |
+
axis=1,
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
daily_kpi_df = combine_comments(
|
| 407 |
+
daily_kpi_df,
|
| 408 |
+
"availability_comment_daily",
|
| 409 |
+
"tch_abis_fail_daily_comment",
|
| 410 |
+
"sdcch_real_blocking_daily_comment",
|
| 411 |
+
new_column="sdcch_comments",
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
daily_kpi_df = combine_comments(
|
| 415 |
+
daily_kpi_df,
|
| 416 |
+
"availability_comment_daily",
|
| 417 |
+
"tch_abis_fail_daily_comment",
|
| 418 |
+
"tch_call_blocking_daily_comment",
|
| 419 |
+
new_column="tch_comments",
|
| 420 |
+
)
|
| 421 |
+
return daily_kpi_df
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def analyse_daily_data(
|
| 425 |
+
daily_report_path: str,
|
| 426 |
+
number_of_kpi_days: int,
|
| 427 |
+
tch_abis_fails_threshold: int,
|
| 428 |
+
availability_threshold: int,
|
| 429 |
+
number_of_threshold_days: int,
|
| 430 |
+
sdcch_blocking_threshold: int,
|
| 431 |
+
tch_blocking_threshold: int,
|
| 432 |
+
) -> pd.DataFrame:
|
| 433 |
+
df = pd.read_csv(daily_report_path, delimiter=";")
|
| 434 |
+
df = kpi_naming_cleaning(df)
|
| 435 |
+
df = create_daily_date(df)
|
| 436 |
+
df = df[KPI_COLUMNS]
|
| 437 |
+
df = daily_dfs_per_kpi(
|
| 438 |
+
df=df,
|
| 439 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 440 |
+
availability_threshold=availability_threshold,
|
| 441 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 442 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 443 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 444 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 445 |
+
)
|
| 446 |
+
daily_df_for_capacity = df.copy()
|
| 447 |
+
daily_df_for_capacity = daily_df_for_capacity[DAILY_COLUMNS_FOR_CAPACITY]
|
| 448 |
+
daily_df_for_capacity = daily_df_for_capacity.reset_index()
|
| 449 |
+
|
| 450 |
+
if isinstance(daily_df_for_capacity.columns, pd.MultiIndex):
|
| 451 |
+
daily_df_for_capacity.columns = [
|
| 452 |
+
"_".join([str(el) for el in col if el])
|
| 453 |
+
for col in daily_df_for_capacity.columns.values
|
| 454 |
+
]
|
| 455 |
+
# Rename "BTS_name" to "name"
|
| 456 |
+
daily_df_for_capacity = daily_df_for_capacity.rename(columns={"BTS_name": "name"})
|
| 457 |
+
|
| 458 |
+
return daily_df_for_capacity, df
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def get_gsm_databases(dump_path: str) -> pd.DataFrame:
|
| 462 |
+
|
| 463 |
+
dfs = combined_gsm_database(dump_path)
|
| 464 |
+
bts_df: pd.DataFrame = dfs[0]
|
| 465 |
+
trx_df: pd.DataFrame = dfs[2]
|
| 466 |
+
|
| 467 |
+
# Clean GSM df
|
| 468 |
+
bts_df = bts_df[GSM_COLUMNS]
|
| 469 |
+
trx_df = trx_df[TRX_COLUMNS]
|
| 470 |
+
|
| 471 |
+
# Remove duplicate in TRX df
|
| 472 |
+
trx_df = trx_df.drop_duplicates(subset=["ID_BTS"])
|
| 473 |
+
|
| 474 |
+
gsm_df = pd.merge(bts_df, trx_df, on="ID_BTS", how="left")
|
| 475 |
+
|
| 476 |
+
# add hf_rate_coef
|
| 477 |
+
gsm_df["hf_rate_coef"] = gsm_df["amrSegLoadDepTchRateLower"].map(
|
| 478 |
+
GsmAnalysis.hf_rate_coef
|
| 479 |
+
)
|
| 480 |
+
# Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
|
| 481 |
+
gsm_df["GPRS"] = (
|
| 482 |
+
gsm_df["dedicatedGPRScapacity"] * gsm_df["number_tch_per_cell"]
|
| 483 |
+
) / 100
|
| 484 |
+
|
| 485 |
+
# "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
|
| 486 |
+
gsm_df["TCH Actual HR%"] = gsm_df["number_tch_per_cell"] * gsm_df["hf_rate_coef"]
|
| 487 |
+
|
| 488 |
+
# Remove empty rows
|
| 489 |
+
gsm_df = gsm_df.dropna(subset=["TCH Actual HR%"])
|
| 490 |
+
|
| 491 |
+
# Get "Offered Traffic BH" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
|
| 492 |
+
gsm_df["Offered Traffic BH"] = gsm_df["TCH Actual HR%"].apply(
|
| 493 |
+
lambda x: GsmAnalysis.erlangB_table.get(int(x), 0)
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
return gsm_df
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
def get_operational_neighbours(distance: int) -> pd.DataFrame:
|
| 500 |
+
|
| 501 |
+
operational_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
|
| 502 |
+
operational_df = operational_df[
|
| 503 |
+
["ID_BTS", "name", "operational_comment", "Longitude", "Latitude"]
|
| 504 |
+
]
|
| 505 |
+
# keep row only if column "operational_comment" is not "Operational is OK"
|
| 506 |
+
operational_df = operational_df[
|
| 507 |
+
operational_df["operational_comment"] != "Operational is OK"
|
| 508 |
+
]
|
| 509 |
+
operational_df = operational_df[
|
| 510 |
+
operational_df[["Latitude", "Longitude"]].notna().all(axis=1)
|
| 511 |
+
]
|
| 512 |
+
|
| 513 |
+
# Rename all columns in operational_df by adding "Dataset2_" prefix
|
| 514 |
+
operational_df = operational_df.add_prefix("Dataset2_")
|
| 515 |
+
|
| 516 |
+
congested_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
|
| 517 |
+
congested_df = congested_df[
|
| 518 |
+
["ID_BTS", "name", "BH Congestion status", "Longitude", "Latitude"]
|
| 519 |
+
]
|
| 520 |
+
|
| 521 |
+
# Remove rows where "BH Congestion status" is empty or NaN
|
| 522 |
+
congested_df = congested_df[
|
| 523 |
+
congested_df["BH Congestion status"].notna()
|
| 524 |
+
& congested_df["BH Congestion status"].astype(str).str.len().astype(bool)
|
| 525 |
+
]
|
| 526 |
+
# Remove rows where "BH Congestion status" is "nan, nan"
|
| 527 |
+
congested_df = congested_df[congested_df["BH Congestion status"] != "nan, nan"]
|
| 528 |
+
|
| 529 |
+
# Remove rows where Latitude and Longitude are empty
|
| 530 |
+
congested_df = congested_df[
|
| 531 |
+
congested_df[["Latitude", "Longitude"]].notna().all(axis=1)
|
| 532 |
+
]
|
| 533 |
+
|
| 534 |
+
# Rename all columns in congested_df by adding "Dataset1_" prefix
|
| 535 |
+
congested_df = congested_df.add_prefix("Dataset1_")
|
| 536 |
+
|
| 537 |
+
distances_dfs = calculate_distances(
|
| 538 |
+
congested_df,
|
| 539 |
+
operational_df,
|
| 540 |
+
"Dataset1_ID_BTS",
|
| 541 |
+
"Dataset1_Latitude",
|
| 542 |
+
"Dataset1_Longitude",
|
| 543 |
+
"Dataset2_ID_BTS",
|
| 544 |
+
"Dataset2_Latitude",
|
| 545 |
+
"Dataset2_Longitude",
|
| 546 |
+
)
|
| 547 |
+
distances_df = distances_dfs[0]
|
| 548 |
+
df1 = distances_df[distances_df["Distance_km"] <= distance]
|
| 549 |
+
|
| 550 |
+
# Rename all columns in df1
|
| 551 |
+
df1 = df1.rename(
|
| 552 |
+
columns={
|
| 553 |
+
"Dataset1_ID_BTS": "Source_ID_BTS",
|
| 554 |
+
"Dataset1_name": "Source_name",
|
| 555 |
+
"Dataset1_BH Congestion status": "Source_BH Congestion status",
|
| 556 |
+
"Dataset1_Longitude": "Source_Longitude",
|
| 557 |
+
"Dataset1_Latitude": "Source_Latitude",
|
| 558 |
+
"Dataset2_ID_BTS_Dataset2": "Neighbour_ID_BTS",
|
| 559 |
+
"Dataset2_name_Dataset2": "Neighbour_name",
|
| 560 |
+
"Dataset2_operational_comment_Dataset2": "Neighbour_operational_comment",
|
| 561 |
+
"Dataset2_Longitude_Dataset2": "Neighbour_Longitude",
|
| 562 |
+
"Dataset2_Latitude_Dataset2": "Neighbour_Latitude",
|
| 563 |
+
}
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
# Remove rows if Source_name = Neighbour_name
|
| 567 |
+
df1 = df1[df1["Source_name"] != df1["Neighbour_name"]]
|
| 568 |
+
|
| 569 |
+
# Reset index
|
| 570 |
+
df1 = df1.reset_index(drop=True)
|
| 571 |
+
return df1
|
| 572 |
+
|
| 573 |
+
|
| 574 |
+
def analyze_gsm_data(
|
| 575 |
+
dump_path: str,
|
| 576 |
+
daily_report_path: str,
|
| 577 |
+
bh_report_path: str,
|
| 578 |
+
number_of_kpi_days: int,
|
| 579 |
+
number_of_threshold_days: int,
|
| 580 |
+
availability_threshold: int,
|
| 581 |
+
tch_abis_fails_threshold: int,
|
| 582 |
+
sdcch_blocking_threshold: float,
|
| 583 |
+
tch_blocking_threshold: float,
|
| 584 |
+
max_traffic_threshold: int,
|
| 585 |
+
operational_neighbours_distance: int,
|
| 586 |
+
):
|
| 587 |
+
GsmCapacity.operational_neighbours_df = None
|
| 588 |
+
|
| 589 |
+
daily_kpi_dfs: pd.DataFrame = analyse_daily_data(
|
| 590 |
+
daily_report_path=daily_report_path,
|
| 591 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 592 |
+
availability_threshold=availability_threshold,
|
| 593 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 594 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 595 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 596 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
gsm_database_df: pd.DataFrame = get_gsm_databases(dump_path)
|
| 600 |
+
|
| 601 |
+
bh_kpi_dfs = analyse_bh_data(
|
| 602 |
+
bh_report_path=bh_report_path,
|
| 603 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 604 |
+
tch_blocking_threshold=tch_blocking_threshold,
|
| 605 |
+
sdcch_blocking_threshold=sdcch_blocking_threshold,
|
| 606 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 607 |
+
tch_abis_fails_threshold=tch_abis_fails_threshold,
|
| 608 |
+
availability_threshold=availability_threshold,
|
| 609 |
+
)
|
| 610 |
+
|
| 611 |
+
bh_kpi_df = bh_kpi_dfs[0]
|
| 612 |
+
bh_kpi_full_df = bh_kpi_dfs[1]
|
| 613 |
+
|
| 614 |
+
daily_kpi_df = daily_kpi_dfs[0]
|
| 615 |
+
daily_kpi_full_df = daily_kpi_dfs[1]
|
| 616 |
+
|
| 617 |
+
gsm_analysis_df = gsm_database_df.merge(bh_kpi_df, on="name", how="left")
|
| 618 |
+
gsm_analysis_df = gsm_analysis_df.merge(daily_kpi_df, on="name", how="left")
|
| 619 |
+
|
| 620 |
+
# "TCH UTILIZATION (@Max Traffic)" equal to "(Max_Trafic" divided by "Offered Traffic BH)*100"
|
| 621 |
+
gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] = (
|
| 622 |
+
gsm_analysis_df["Max_Traffic BH"] / gsm_analysis_df["Offered Traffic BH"]
|
| 623 |
+
) * 100
|
| 624 |
+
|
| 625 |
+
# Add column "Tch utilization comments" : if "TCH UTILIZATION (@Max Traffic)" exceeded it's threshold then "Tch utilization exceeded threshold else None
|
| 626 |
+
gsm_analysis_df["Tch utilization comments"] = np.where(
|
| 627 |
+
gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] > max_traffic_threshold,
|
| 628 |
+
"Tch utilization exceeded threshold",
|
| 629 |
+
None,
|
| 630 |
+
)
|
| 631 |
+
# Add "BH Congestion status" : concatenate "Tch utilization comments" + "tch_call_blocking_bh_comment" + "sdcch_real_blocking_bh_comment"
|
| 632 |
+
gsm_analysis_df = combine_comments(
|
| 633 |
+
gsm_analysis_df,
|
| 634 |
+
"Tch utilization comments",
|
| 635 |
+
"tch_call_blocking_bh_comment",
|
| 636 |
+
"sdcch_real_blocking_bh_comment",
|
| 637 |
+
new_column="BH Congestion status",
|
| 638 |
+
)
|
| 639 |
+
|
| 640 |
+
# Add "ERLANGB value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
|
| 641 |
+
gsm_analysis_df["ErlabngB_value"] = gsm_analysis_df["Max_Traffic BH"] / (
|
| 642 |
+
1 - (gsm_analysis_df["max_tch_call_blocking_bh"] / 200)
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
# - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
|
| 646 |
+
gsm_analysis_df["Target FR CHs"] = gsm_analysis_df["ErlabngB_value"].apply(
|
| 647 |
+
lambda x: GsmAnalysis.erlangB_table.get(int(x) if pd.notnull(x) else 0, 0)
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# "Target HR CHs" equal to "Target FR CHs" * 2
|
| 651 |
+
gsm_analysis_df["Target HR CHs"] = gsm_analysis_df["Target FR CHs"] * 2
|
| 652 |
+
|
| 653 |
+
# - Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
|
| 654 |
+
gsm_analysis_df["Target TCHs"] = (
|
| 655 |
+
gsm_analysis_df["Target HR CHs"]
|
| 656 |
+
+ gsm_analysis_df["number_signals_per_cell"]
|
| 657 |
+
+ gsm_analysis_df["GPRS"]
|
| 658 |
+
+ gsm_analysis_df["number_sd_per_cell"]
|
| 659 |
+
)
|
| 660 |
+
# "Target TRXs" equal to roundup(Target TCHs/8)
|
| 661 |
+
gsm_analysis_df["Target TRXs"] = np.ceil(
|
| 662 |
+
gsm_analysis_df["Target TCHs"] / 8
|
| 663 |
+
) # df["Target TCHs"] / 8
|
| 664 |
+
|
| 665 |
+
# "Number of required TRXs" equal to difference between "Target TRXs" and "number_trx_per_cell"
|
| 666 |
+
gsm_analysis_df["Number of required TRXs"] = (
|
| 667 |
+
gsm_analysis_df["Target TRXs"] - gsm_analysis_df["number_trx_per_cell"]
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
# if "availability_comment_daily" equal to "Down Site" then "Down Site"
|
| 671 |
+
# if "availability_comment_daily" is not "Availability OK" and "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "Availability and TX issues"
|
| 672 |
+
# if "availability_comment_daily" is not "Availability OK" and "tch_abis_fail_daily_comment" is empty then "Availability issues"
|
| 673 |
+
# if "availability_comment_daily" is "Availability OK" and "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "TX issues"
|
| 674 |
+
# Else "Operational is OK"
|
| 675 |
+
gsm_analysis_df["operational_comment"] = np.select(
|
| 676 |
+
[
|
| 677 |
+
gsm_analysis_df["availability_comment_daily"] == "Down Site", # 1
|
| 678 |
+
(gsm_analysis_df["availability_comment_daily"] != "Availability OK")
|
| 679 |
+
& (
|
| 680 |
+
gsm_analysis_df["tch_abis_fail_daily_comment"]
|
| 681 |
+
== "tch abis fail exceeded threshold"
|
| 682 |
+
), # 2
|
| 683 |
+
(gsm_analysis_df["availability_comment_daily"] != "Availability OK")
|
| 684 |
+
& pd.isna(gsm_analysis_df["tch_abis_fail_daily_comment"]), # 3
|
| 685 |
+
(gsm_analysis_df["availability_comment_daily"] == "Availability OK")
|
| 686 |
+
& (
|
| 687 |
+
gsm_analysis_df["tch_abis_fail_daily_comment"]
|
| 688 |
+
== "tch abis fail exceeded threshold"
|
| 689 |
+
), # 4
|
| 690 |
+
],
|
| 691 |
+
[
|
| 692 |
+
"Down Site", # 1
|
| 693 |
+
"Availability and TX issues", # 2
|
| 694 |
+
"Availability issues", # 3
|
| 695 |
+
"TX issues", # 4
|
| 696 |
+
],
|
| 697 |
+
default="Operational is OK",
|
| 698 |
+
)
|
| 699 |
+
|
| 700 |
+
# Add "Final comment" with "BH Congestion status" + "operational_comment"
|
| 701 |
+
gsm_analysis_df = combine_comments(
|
| 702 |
+
gsm_analysis_df,
|
| 703 |
+
"BH Congestion status",
|
| 704 |
+
"operational_comment",
|
| 705 |
+
new_column="Final comment",
|
| 706 |
+
)
|
| 707 |
+
# Map the final comment using final_comment_mapping
|
| 708 |
+
gsm_analysis_df["Final comment summary"] = gsm_analysis_df["Final comment"].map(
|
| 709 |
+
GsmCapacity.final_comment_mapping
|
| 710 |
+
)
|
| 711 |
+
gsm_analysis_df = gsm_analysis_df[GSM_ANALYSIS_COLUMNS]
|
| 712 |
+
|
| 713 |
+
GsmCapacity.operational_neighbours_df = gsm_analysis_df[
|
| 714 |
+
OPERATIONAL_NEIGHBOURS_COLUMNS
|
| 715 |
+
]
|
| 716 |
+
distance_df = get_operational_neighbours(operational_neighbours_distance)
|
| 717 |
+
|
| 718 |
+
return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df, distance_df]
|
| 719 |
+
# return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df]
|
process_kpi/process_lcg_capacity.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from utils.kpi_analysis_utils import (
|
| 5 |
+
analyze_lcg_utilization,
|
| 6 |
+
combine_comments,
|
| 7 |
+
create_daily_date,
|
| 8 |
+
create_dfs_per_kpi,
|
| 9 |
+
kpi_naming_cleaning,
|
| 10 |
+
)
|
| 11 |
+
from utils.utils_vars import get_physical_db
|
| 12 |
+
|
| 13 |
+
lcg_comments_mapping = {
|
| 14 |
+
"2": "No Congestion",
|
| 15 |
+
"1": "No Congestion",
|
| 16 |
+
"lcg1 exceeded threshold, lcg2 exceeded threshold, 2": "Need BB SU upgrage",
|
| 17 |
+
"lcg1 exceeded threshold, 2": "Need LCG balancing",
|
| 18 |
+
"lcg1 exceeded threshold, 1": "Need BB SU upgrage",
|
| 19 |
+
"lcg2 exceeded threshold, 2": "Need LCG balancing",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
KPI_COLUMNS = [
|
| 24 |
+
"date",
|
| 25 |
+
"WBTS_name",
|
| 26 |
+
"lcg_id",
|
| 27 |
+
"BB_SU_LCG_MAX_R",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
LCG_ANALYSIS_COLUMNS = [
|
| 31 |
+
"WBTS_name",
|
| 32 |
+
"lcg1_utilisation",
|
| 33 |
+
"avg_lcg1",
|
| 34 |
+
"max_lcg1",
|
| 35 |
+
"number_of_days_with_lcg1_exceeded",
|
| 36 |
+
"lcg1_comment",
|
| 37 |
+
"lcg2_utilisation",
|
| 38 |
+
"avg_lcg2",
|
| 39 |
+
"max_lcg2",
|
| 40 |
+
"number_of_days_with_lcg2_exceeded",
|
| 41 |
+
"lcg2_comment",
|
| 42 |
+
"difference_between_lcgs",
|
| 43 |
+
"difference_between_lcgs_comment",
|
| 44 |
+
"lcg_comment",
|
| 45 |
+
"number_of_lcg",
|
| 46 |
+
"final_comments",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def lcg_kpi_analysis(
|
| 51 |
+
df,
|
| 52 |
+
num_last_days,
|
| 53 |
+
num_threshold_days,
|
| 54 |
+
lcg_utilization_threshold,
|
| 55 |
+
difference_between_lcgs,
|
| 56 |
+
) -> pd.DataFrame:
|
| 57 |
+
"""
|
| 58 |
+
Analyze LCG capacity data.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
df: DataFrame containing LCG capacity data
|
| 62 |
+
num_last_days: Number of days for analysis
|
| 63 |
+
num_threshold_days: Minimum days above threshold to flag for upgrade
|
| 64 |
+
lcg_utilization_threshold: Utilization threshold percentage for flagging
|
| 65 |
+
difference_between_lcgs: Difference between LCGs for flagging
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Processed DataFrame with LCG capacity analysis results
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
lcg1_df = df[df["lcg_id"] == 1]
|
| 72 |
+
lcg2_df = df[df["lcg_id"] == 2]
|
| 73 |
+
|
| 74 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 75 |
+
df=df,
|
| 76 |
+
pivot_date_column="date",
|
| 77 |
+
pivot_name_column="WBTS_name",
|
| 78 |
+
kpi_columns_from=2,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
pivoted_lcg1_df = create_dfs_per_kpi(
|
| 82 |
+
df=lcg1_df,
|
| 83 |
+
pivot_date_column="date",
|
| 84 |
+
pivot_name_column="WBTS_name",
|
| 85 |
+
kpi_columns_from=2,
|
| 86 |
+
)
|
| 87 |
+
pivoted_lcg2_df = create_dfs_per_kpi(
|
| 88 |
+
df=lcg2_df,
|
| 89 |
+
pivot_date_column="date",
|
| 90 |
+
pivot_name_column="WBTS_name",
|
| 91 |
+
kpi_columns_from=2,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# BB_SU_LCG_MAX_R to have all site with LCG 1 and/ or LCG 2
|
| 95 |
+
BB_SU_LCG_MAX_R_df = pivoted_kpi_dfs["BB_SU_LCG_MAX_R"]
|
| 96 |
+
|
| 97 |
+
pivoted_lcg1_df = pivoted_lcg1_df["BB_SU_LCG_MAX_R"]
|
| 98 |
+
pivoted_lcg2_df = pivoted_lcg2_df["BB_SU_LCG_MAX_R"]
|
| 99 |
+
|
| 100 |
+
# rename column
|
| 101 |
+
pivoted_lcg1_df = pivoted_lcg1_df.rename(
|
| 102 |
+
columns={"BB_SU_LCG_MAX_R": "lcg1_utilisation"}
|
| 103 |
+
)
|
| 104 |
+
pivoted_lcg2_df = pivoted_lcg2_df.rename(
|
| 105 |
+
columns={"BB_SU_LCG_MAX_R": "lcg2_utilisation"}
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# analyze lcg utilization for each site per number_of_kpi_days and number_of_threshold_days
|
| 109 |
+
pivoted_lcg1_df = analyze_lcg_utilization(
|
| 110 |
+
df=pivoted_lcg1_df,
|
| 111 |
+
number_of_kpi_days=num_last_days,
|
| 112 |
+
number_of_threshold_days=num_threshold_days,
|
| 113 |
+
kpi_threshold=lcg_utilization_threshold,
|
| 114 |
+
kpi_column_name="lcg1",
|
| 115 |
+
)
|
| 116 |
+
pivoted_lcg2_df = analyze_lcg_utilization(
|
| 117 |
+
df=pivoted_lcg2_df,
|
| 118 |
+
number_of_kpi_days=num_last_days,
|
| 119 |
+
number_of_threshold_days=num_threshold_days,
|
| 120 |
+
kpi_threshold=lcg_utilization_threshold,
|
| 121 |
+
kpi_column_name="lcg2",
|
| 122 |
+
)
|
| 123 |
+
kpi_df = pd.concat(
|
| 124 |
+
[
|
| 125 |
+
BB_SU_LCG_MAX_R_df,
|
| 126 |
+
pivoted_lcg1_df,
|
| 127 |
+
pivoted_lcg2_df,
|
| 128 |
+
],
|
| 129 |
+
axis=1,
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
kpi_df = kpi_df.reset_index()
|
| 133 |
+
|
| 134 |
+
# Number of available lcgs
|
| 135 |
+
# kpi_df = pd.merge(kpi_df, available_lcgs_df, on="WBTS_name", how="left")
|
| 136 |
+
|
| 137 |
+
# calculate difference between lcg1 and lcg2
|
| 138 |
+
kpi_df["difference_between_lcgs"] = kpi_df[["avg_lcg1", "avg_lcg2"]].apply(
|
| 139 |
+
lambda row: max(row) - min(row), axis=1
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# flag if difference between lcg1 and lcg2 is above threshold
|
| 143 |
+
kpi_df["difference_between_lcgs_comment"] = np.where(
|
| 144 |
+
kpi_df["difference_between_lcgs"] > difference_between_lcgs,
|
| 145 |
+
"difference between lcgs exceeded threshold",
|
| 146 |
+
None,
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
# Combine comments
|
| 150 |
+
kpi_df = combine_comments(
|
| 151 |
+
kpi_df,
|
| 152 |
+
"lcg1_comment",
|
| 153 |
+
"lcg2_comment",
|
| 154 |
+
# "difference_between_lcgs_comment",
|
| 155 |
+
new_column="lcg_comment",
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Replace if "lcg_comment" contains "nan" and ", nan" and "nan, " with None
|
| 159 |
+
kpi_df["lcg_comment"] = kpi_df["lcg_comment"].replace("nan", None)
|
| 160 |
+
|
| 161 |
+
# Remove "nan" from comma-separated strings
|
| 162 |
+
kpi_df["lcg_comment"] = (
|
| 163 |
+
kpi_df["lcg_comment"].str.replace(r"\bnan\b,?\s?", "", regex=True).str.strip()
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
kpi_df["number_of_lcg"] = np.where(
|
| 167 |
+
kpi_df["avg_lcg1"].notna() & kpi_df["avg_lcg2"].notna(),
|
| 168 |
+
2,
|
| 169 |
+
np.where(kpi_df["avg_lcg1"].notna() | kpi_df["avg_lcg2"].notna(), 1, 0),
|
| 170 |
+
)
|
| 171 |
+
# Combine comments
|
| 172 |
+
kpi_df = combine_comments(
|
| 173 |
+
kpi_df,
|
| 174 |
+
"lcg_comment",
|
| 175 |
+
"number_of_lcg",
|
| 176 |
+
new_column="final_comments",
|
| 177 |
+
)
|
| 178 |
+
kpi_df["final_comments"] = kpi_df["final_comments"].apply(
|
| 179 |
+
lambda x: lcg_comments_mapping.get(x, x)
|
| 180 |
+
)
|
| 181 |
+
kpi_df = kpi_df[LCG_ANALYSIS_COLUMNS]
|
| 182 |
+
|
| 183 |
+
lcg_analysis_df = kpi_df.copy()
|
| 184 |
+
|
| 185 |
+
lcg_analysis_df = lcg_analysis_df[
|
| 186 |
+
[
|
| 187 |
+
"WBTS_name",
|
| 188 |
+
"avg_lcg1",
|
| 189 |
+
"max_lcg1",
|
| 190 |
+
"number_of_days_with_lcg1_exceeded",
|
| 191 |
+
"lcg1_comment",
|
| 192 |
+
"avg_lcg2",
|
| 193 |
+
"max_lcg2",
|
| 194 |
+
"number_of_days_with_lcg2_exceeded",
|
| 195 |
+
"lcg2_comment",
|
| 196 |
+
"difference_between_lcgs",
|
| 197 |
+
"final_comments",
|
| 198 |
+
]
|
| 199 |
+
]
|
| 200 |
+
|
| 201 |
+
lcg_analysis_df = lcg_analysis_df.droplevel(level=1, axis=1)
|
| 202 |
+
# Remove row if code less than 5 characters
|
| 203 |
+
lcg_analysis_df = lcg_analysis_df[lcg_analysis_df["WBTS_name"].str.len() >= 5]
|
| 204 |
+
|
| 205 |
+
# Add code
|
| 206 |
+
lcg_analysis_df["code"] = lcg_analysis_df["WBTS_name"].str.split("_").str[0]
|
| 207 |
+
|
| 208 |
+
lcg_analysis_df["code"] = (
|
| 209 |
+
pd.to_numeric(lcg_analysis_df["code"], errors="coerce").fillna(0).astype(int)
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
lcg_analysis_df["Region"] = (
|
| 213 |
+
lcg_analysis_df["WBTS_name"].str.split("_").str[1:2].str.join("_")
|
| 214 |
+
)
|
| 215 |
+
lcg_analysis_df["Region"] = lcg_analysis_df["Region"].fillna("UNKNOWN")
|
| 216 |
+
|
| 217 |
+
# move code to the first column
|
| 218 |
+
lcg_analysis_df = lcg_analysis_df[
|
| 219 |
+
["code", "Region"]
|
| 220 |
+
+ [col for col in lcg_analysis_df if col != "code" and col != "Region"]
|
| 221 |
+
]
|
| 222 |
+
|
| 223 |
+
# Load physical database
|
| 224 |
+
physical_db: pd.DataFrame = get_physical_db()
|
| 225 |
+
|
| 226 |
+
# Convert code_sector to code
|
| 227 |
+
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
|
| 228 |
+
# remove duplicates
|
| 229 |
+
physical_db = physical_db.drop_duplicates(subset="code")
|
| 230 |
+
|
| 231 |
+
# keep only code and longitude and latitude
|
| 232 |
+
physical_db = physical_db[["code", "Longitude", "Latitude"]]
|
| 233 |
+
|
| 234 |
+
physical_db["code"] = (
|
| 235 |
+
pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
lcg_analysis_df = pd.merge(
|
| 239 |
+
lcg_analysis_df,
|
| 240 |
+
physical_db,
|
| 241 |
+
on="code",
|
| 242 |
+
how="left",
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
return [lcg_analysis_df, kpi_df]
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def load_and_process_lcg_data(
|
| 249 |
+
uploaded_file,
|
| 250 |
+
num_last_days,
|
| 251 |
+
num_threshold_days,
|
| 252 |
+
lcg_utilization_threshold,
|
| 253 |
+
difference_between_lcgs,
|
| 254 |
+
) -> pd.DataFrame:
|
| 255 |
+
"""Load and process data for LCG capacity analysis."""
|
| 256 |
+
try:
|
| 257 |
+
# Load data
|
| 258 |
+
df = pd.read_csv(uploaded_file, delimiter=";")
|
| 259 |
+
if df.empty:
|
| 260 |
+
raise ValueError("Uploaded file is empty")
|
| 261 |
+
|
| 262 |
+
df = kpi_naming_cleaning(df)
|
| 263 |
+
df = create_daily_date(df)
|
| 264 |
+
|
| 265 |
+
# Validate required columns
|
| 266 |
+
missing_cols = [col for col in KPI_COLUMNS if col not in df.columns]
|
| 267 |
+
if missing_cols:
|
| 268 |
+
raise ValueError(f"Missing required columns: {', '.join(missing_cols)}")
|
| 269 |
+
|
| 270 |
+
df = df[KPI_COLUMNS]
|
| 271 |
+
|
| 272 |
+
# Process the data
|
| 273 |
+
dfs = lcg_kpi_analysis(
|
| 274 |
+
df,
|
| 275 |
+
num_last_days,
|
| 276 |
+
num_threshold_days,
|
| 277 |
+
lcg_utilization_threshold,
|
| 278 |
+
difference_between_lcgs,
|
| 279 |
+
)
|
| 280 |
+
return dfs
|
| 281 |
+
|
| 282 |
+
except Exception as e:
|
| 283 |
+
# Log the error and re-raise with a user-friendly message
|
| 284 |
+
error_msg = f"Error processing LCG data: {str(e)}"
|
| 285 |
+
st.error(error_msg)
|
| 286 |
+
raise
|
process_kpi/process_lte_capacity.py
ADDED
|
@@ -0,0 +1,528 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from queries.process_lte import process_lte_data
|
| 5 |
+
from utils.convert_to_excel import save_dataframe
|
| 6 |
+
from utils.kpi_analysis_utils import (
|
| 7 |
+
LteCapacity,
|
| 8 |
+
analyze_prb_usage,
|
| 9 |
+
cell_availability_analysis,
|
| 10 |
+
create_dfs_per_kpi,
|
| 11 |
+
create_hourly_date,
|
| 12 |
+
kpi_naming_cleaning,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
LTE_ANALYSIS_COLUMNS = [
|
| 16 |
+
"code",
|
| 17 |
+
"code_sector",
|
| 18 |
+
"Region",
|
| 19 |
+
"site_config_band",
|
| 20 |
+
"Longitude",
|
| 21 |
+
"Latitude",
|
| 22 |
+
"LNCEL_name_l800",
|
| 23 |
+
"LNCEL_name_l1800",
|
| 24 |
+
"LNCEL_name_l2300",
|
| 25 |
+
"LNCEL_name_l2600",
|
| 26 |
+
"LNCEL_name_l1800s",
|
| 27 |
+
"avg_prb_usage_bh_l800",
|
| 28 |
+
"avg_prb_usage_bh_l1800",
|
| 29 |
+
"avg_prb_usage_bh_l2300",
|
| 30 |
+
"avg_prb_usage_bh_l2600",
|
| 31 |
+
"avg_prb_usage_bh_l1800s",
|
| 32 |
+
"avg_prb_usage_bh_l800_2nd",
|
| 33 |
+
"avg_prb_usage_bh_l1800_2nd",
|
| 34 |
+
"avg_prb_usage_bh_l2300_2nd",
|
| 35 |
+
"avg_prb_usage_bh_l2600_2nd",
|
| 36 |
+
"avg_prb_usage_bh_l1800s_2nd",
|
| 37 |
+
"avg_act_ues_l800",
|
| 38 |
+
"avg_act_ues_l1800",
|
| 39 |
+
"avg_act_ues_l2300",
|
| 40 |
+
"avg_act_ues_l2600",
|
| 41 |
+
"avg_act_ues_l1800s",
|
| 42 |
+
"avg_dl_thp_l800",
|
| 43 |
+
"avg_dl_thp_l1800",
|
| 44 |
+
"avg_dl_thp_l2300",
|
| 45 |
+
"avg_dl_thp_l2600",
|
| 46 |
+
"avg_dl_thp_l1800s",
|
| 47 |
+
"avg_ul_thp_l800",
|
| 48 |
+
"avg_ul_thp_l1800",
|
| 49 |
+
"avg_ul_thp_l2300",
|
| 50 |
+
"avg_ul_thp_l2600",
|
| 51 |
+
"avg_ul_thp_l1800s",
|
| 52 |
+
"num_congested_cells",
|
| 53 |
+
"num_cells",
|
| 54 |
+
"num_cell_with_kpi",
|
| 55 |
+
"num_down_or_no_kpi_cells",
|
| 56 |
+
"prb_diff_between_cells",
|
| 57 |
+
"load_balance_required",
|
| 58 |
+
"congestion_comment",
|
| 59 |
+
"final_comments",
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
LTE_DATABASE_COLUMNS = [
|
| 63 |
+
"code",
|
| 64 |
+
"Region",
|
| 65 |
+
"site_config_band",
|
| 66 |
+
"final_name",
|
| 67 |
+
"Longitude",
|
| 68 |
+
"Latitude",
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
KPI_COLUMNS = [
|
| 72 |
+
"date",
|
| 73 |
+
"LNCEL_name",
|
| 74 |
+
"Cell_Avail_excl_BLU",
|
| 75 |
+
"E_UTRAN_Avg_PRB_usage_per_TTI_DL",
|
| 76 |
+
"DL_PRB_Util_p_TTI_Lev_10",
|
| 77 |
+
"Avg_PDCP_cell_thp_UL",
|
| 78 |
+
"Avg_PDCP_cell_thp_DL",
|
| 79 |
+
"Avg_act_UEs_DL",
|
| 80 |
+
]
|
| 81 |
+
PRB_COLUMNS = [
|
| 82 |
+
"LNCEL_name",
|
| 83 |
+
"avg_prb_usage_bh",
|
| 84 |
+
"avg_prb_usage_bh_2nd",
|
| 85 |
+
"avg_act_ues",
|
| 86 |
+
"avg_dl_thp",
|
| 87 |
+
"avg_ul_thp",
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def lte_analysis_logic(
|
| 92 |
+
df: pd.DataFrame,
|
| 93 |
+
prb_usage_threshold: int,
|
| 94 |
+
prb_diff_between_cells_threshold: int,
|
| 95 |
+
) -> pd.DataFrame:
|
| 96 |
+
lte_analysis_logic_df = df.copy()
|
| 97 |
+
lte_analysis_logic_df["num_congested_cells"] = (
|
| 98 |
+
lte_analysis_logic_df[
|
| 99 |
+
[
|
| 100 |
+
"avg_prb_usage_bh_l800",
|
| 101 |
+
"avg_prb_usage_bh_l1800",
|
| 102 |
+
"avg_prb_usage_bh_l2300",
|
| 103 |
+
"avg_prb_usage_bh_l2600",
|
| 104 |
+
"avg_prb_usage_bh_l1800s",
|
| 105 |
+
]
|
| 106 |
+
]
|
| 107 |
+
>= prb_usage_threshold
|
| 108 |
+
).sum(axis=1)
|
| 109 |
+
|
| 110 |
+
# Add Number of cells LNCEL_name_l800 LNCEL_name_l1800 LNCEL_name_l2300 LNCEL_name_l2600 LNCEL_name_l1800s
|
| 111 |
+
lte_analysis_logic_df["num_cells"] = lte_analysis_logic_df[
|
| 112 |
+
[
|
| 113 |
+
"LNCEL_name_l800",
|
| 114 |
+
"LNCEL_name_l1800",
|
| 115 |
+
"LNCEL_name_l2300",
|
| 116 |
+
"LNCEL_name_l2600",
|
| 117 |
+
"LNCEL_name_l1800s",
|
| 118 |
+
]
|
| 119 |
+
].count(axis=1)
|
| 120 |
+
|
| 121 |
+
# Add Number of cell with KPI
|
| 122 |
+
lte_analysis_logic_df["num_cell_with_kpi"] = lte_analysis_logic_df[
|
| 123 |
+
[
|
| 124 |
+
"avg_prb_usage_bh_l800",
|
| 125 |
+
"avg_prb_usage_bh_l1800",
|
| 126 |
+
"avg_prb_usage_bh_l2300",
|
| 127 |
+
"avg_prb_usage_bh_l2600",
|
| 128 |
+
"avg_prb_usage_bh_l1800s",
|
| 129 |
+
]
|
| 130 |
+
].count(axis=1)
|
| 131 |
+
|
| 132 |
+
# Number of Down or No KPI cells = num_cells -num_cell_with_kpi
|
| 133 |
+
lte_analysis_logic_df["num_down_or_no_kpi_cells"] = (
|
| 134 |
+
lte_analysis_logic_df["num_cells"] - lte_analysis_logic_df["num_cell_with_kpi"]
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# Check Max difference between avg_prb_usage_bh_l800 avg_prb_usage_bh_l1800 avg_prb_usage_bh_l2300 avg_prb_usage_bh_l2600 avg_prb_usage_bh_l1800s
|
| 138 |
+
lte_analysis_logic_df["prb_diff_between_cells"] = lte_analysis_logic_df[
|
| 139 |
+
[
|
| 140 |
+
"avg_prb_usage_bh_l800",
|
| 141 |
+
"avg_prb_usage_bh_l1800",
|
| 142 |
+
"avg_prb_usage_bh_l2300",
|
| 143 |
+
"avg_prb_usage_bh_l2600",
|
| 144 |
+
"avg_prb_usage_bh_l1800s",
|
| 145 |
+
]
|
| 146 |
+
].apply(lambda row: max(row) - min(row), axis=1)
|
| 147 |
+
|
| 148 |
+
# Add Load balance required column = Yes if prb_diff_between_cells > prb_diff_between_cells_threshold else No
|
| 149 |
+
lte_analysis_logic_df["load_balance_required"] = lte_analysis_logic_df[
|
| 150 |
+
"prb_diff_between_cells"
|
| 151 |
+
].apply(lambda x: "Yes" if x > prb_diff_between_cells_threshold else "No")
|
| 152 |
+
|
| 153 |
+
# Add Next band column
|
| 154 |
+
lte_analysis_logic_df["next_band"] = lte_analysis_logic_df["site_config_band"].map(
|
| 155 |
+
LteCapacity.next_band_mapping
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Add congestion comments
|
| 159 |
+
# if num_congested_cells == 0 and num_down_or_no_kpi_cells == 0 = " No Congestion"
|
| 160 |
+
# if num_congested_cells == 0 and num_down_or_no_kpi_cells > 0 = "No congestion but Down cell"
|
| 161 |
+
# if num_congested_cells > 0 and num_down_or_no_kpi_cells > 0 = "Congestion but Colocated Down Cell"
|
| 162 |
+
# Else Need Action
|
| 163 |
+
conditions = [
|
| 164 |
+
(lte_analysis_logic_df["num_congested_cells"] == 0)
|
| 165 |
+
& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] == 0),
|
| 166 |
+
(lte_analysis_logic_df["num_congested_cells"] == 0)
|
| 167 |
+
& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
|
| 168 |
+
(lte_analysis_logic_df["num_congested_cells"] > 0)
|
| 169 |
+
& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
|
| 170 |
+
]
|
| 171 |
+
|
| 172 |
+
choices = [
|
| 173 |
+
"No Congestion",
|
| 174 |
+
"No congestion but Down cell",
|
| 175 |
+
"Congestion but Colocated Down Cell",
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
lte_analysis_logic_df["congestion_comment"] = np.select(
|
| 179 |
+
conditions, choices, default="Need Action"
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
# Add "Actions" column
|
| 183 |
+
# if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Load Balancing parameter tuning required"
|
| 184 |
+
# if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Add Layer"
|
| 185 |
+
# Else keep congestion_comment
|
| 186 |
+
conditions = [
|
| 187 |
+
(lte_analysis_logic_df["load_balance_required"] == "Yes")
|
| 188 |
+
& (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
|
| 189 |
+
(lte_analysis_logic_df["load_balance_required"] == "No")
|
| 190 |
+
& (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
choices = [
|
| 194 |
+
"Load Balancing parameter tuning required",
|
| 195 |
+
"Add Layer",
|
| 196 |
+
]
|
| 197 |
+
|
| 198 |
+
lte_analysis_logic_df["actions"] = np.select(
|
| 199 |
+
conditions, choices, default=lte_analysis_logic_df["congestion_comment"]
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# Add Final Comments
|
| 203 |
+
# if "actions" = "Add Layer" then "'Add' + 'next_band''
|
| 204 |
+
# Else keep "actions" as it is
|
| 205 |
+
lte_analysis_logic_df["final_comments"] = lte_analysis_logic_df.apply(
|
| 206 |
+
lambda row: (
|
| 207 |
+
f"Add {row['next_band']}"
|
| 208 |
+
if row["actions"] == "Add Layer"
|
| 209 |
+
else row["actions"]
|
| 210 |
+
),
|
| 211 |
+
axis=1,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# create column "sector" equal to conteent of "LNCEL_name_l800" if not empty else "LNCEL_name_l1800" if not empty else "LNCEL_name_l2300"
|
| 215 |
+
lte_analysis_logic_df["sector"] = (
|
| 216 |
+
lte_analysis_logic_df["LNCEL_name_l800"]
|
| 217 |
+
.combine_first(lte_analysis_logic_df["LNCEL_name_l1800"])
|
| 218 |
+
.combine_first(lte_analysis_logic_df["LNCEL_name_l2300"])
|
| 219 |
+
.combine_first(lte_analysis_logic_df["LNCEL_name_l2600"])
|
| 220 |
+
.combine_first(lte_analysis_logic_df["LNCEL_name_l1800s"])
|
| 221 |
+
)
|
| 222 |
+
# remove rows where sector is empty
|
| 223 |
+
lte_analysis_logic_df = lte_analysis_logic_df[
|
| 224 |
+
lte_analysis_logic_df["sector"].notna()
|
| 225 |
+
]
|
| 226 |
+
# Add sector_id column if sector contains : '_1_" then 1 elif sector contains : '_2_" then 2 elif sector contains : '_3_" then 3
|
| 227 |
+
lte_analysis_logic_df["sector_id"] = np.where(
|
| 228 |
+
lte_analysis_logic_df["sector"].str.contains("_1_"),
|
| 229 |
+
1,
|
| 230 |
+
np.where(
|
| 231 |
+
lte_analysis_logic_df["sector"].str.contains("_2_"),
|
| 232 |
+
2,
|
| 233 |
+
np.where(lte_analysis_logic_df["sector"].str.contains("_3_"), 3, np.nan),
|
| 234 |
+
),
|
| 235 |
+
)
|
| 236 |
+
# add code_sector column by combine code and sector_id
|
| 237 |
+
lte_analysis_logic_df["code_sector"] = (
|
| 238 |
+
lte_analysis_logic_df["code"].astype(str)
|
| 239 |
+
+ "_"
|
| 240 |
+
+ lte_analysis_logic_df["sector_id"].astype(str)
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# remove '.0' from code_sector
|
| 244 |
+
lte_analysis_logic_df["code_sector"] = lte_analysis_logic_df[
|
| 245 |
+
"code_sector"
|
| 246 |
+
].str.replace(".0", "")
|
| 247 |
+
|
| 248 |
+
# lte_analysis_logic_df = lte_analysis_logic_df[LTE_ANALYSIS_COLUMNS]
|
| 249 |
+
return lte_analysis_logic_df
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
|
| 253 |
+
# Base DataFrame with unique codes, Region, and site_config_band
|
| 254 |
+
all_codes_df = df[
|
| 255 |
+
["code", "Region", "site_config_band", "Longitude", "Latitude"]
|
| 256 |
+
].drop_duplicates()
|
| 257 |
+
|
| 258 |
+
# Configuration for sector groups and their respective LNCEL patterns and column suffixes
|
| 259 |
+
# Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
|
| 260 |
+
# lncel_name_pattern_part will be combined with "_<group_key>" or similar
|
| 261 |
+
# Example: for group "1", pattern "_1_L800" gives suffix "l800"
|
| 262 |
+
sector_groups_config = {
|
| 263 |
+
"1": [
|
| 264 |
+
("_1_L800", "l800"),
|
| 265 |
+
("_1_L1800", "l1800"),
|
| 266 |
+
("_1_L2300", "l2300"),
|
| 267 |
+
("_1_L2600", "l2600"),
|
| 268 |
+
("_1S_L1800", "l1800s"),
|
| 269 |
+
],
|
| 270 |
+
"2": [
|
| 271 |
+
("_2_L800", "l800"),
|
| 272 |
+
("_2_L1800", "l1800"),
|
| 273 |
+
("_2_L2300", "l2300"),
|
| 274 |
+
("_2_L2600", "l2600"),
|
| 275 |
+
("_2S_L1800", "l1800s"),
|
| 276 |
+
],
|
| 277 |
+
"3": [
|
| 278 |
+
("_3_L800", "l800"),
|
| 279 |
+
("_3_L1800", "l1800"),
|
| 280 |
+
("_3_L2300", "l2300"),
|
| 281 |
+
("_3_L2600", "l2600"),
|
| 282 |
+
("_3S_L1800", "l1800s"),
|
| 283 |
+
],
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
all_processed_sectors_dfs = []
|
| 287 |
+
|
| 288 |
+
for sector_group_key, band_configurations in sector_groups_config.items():
|
| 289 |
+
# Start with the base DataFrame for the current sector group
|
| 290 |
+
current_sector_group_df = all_codes_df.copy()
|
| 291 |
+
|
| 292 |
+
for lncel_name_pattern, column_suffix in band_configurations:
|
| 293 |
+
# Filter the original DataFrame for the current LNCEL pattern
|
| 294 |
+
# The pattern assumes LNCEL_name contains something like "SITENAME<lncel_name_pattern>"
|
| 295 |
+
filtered_band_df = df[df["LNCEL_name"].str.contains(lncel_name_pattern)]
|
| 296 |
+
|
| 297 |
+
# Select relevant columns and rename them for the merge
|
| 298 |
+
# This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
|
| 299 |
+
df_to_merge = filtered_band_df[
|
| 300 |
+
[
|
| 301 |
+
"code",
|
| 302 |
+
"LNCEL_name",
|
| 303 |
+
"avg_prb_usage_bh",
|
| 304 |
+
"avg_prb_usage_bh_2nd",
|
| 305 |
+
"avg_act_ues",
|
| 306 |
+
"avg_dl_thp",
|
| 307 |
+
"avg_ul_thp",
|
| 308 |
+
]
|
| 309 |
+
].rename(
|
| 310 |
+
columns={
|
| 311 |
+
"LNCEL_name": f"LNCEL_name_{column_suffix}",
|
| 312 |
+
"avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
|
| 313 |
+
"avg_prb_usage_bh_2nd": f"avg_prb_usage_bh_{column_suffix}_2nd",
|
| 314 |
+
"avg_act_ues": f"avg_act_ues_{column_suffix}",
|
| 315 |
+
"avg_dl_thp": f"avg_dl_thp_{column_suffix}",
|
| 316 |
+
"avg_ul_thp": f"avg_ul_thp_{column_suffix}",
|
| 317 |
+
}
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
# Perform a left merge
|
| 321 |
+
current_sector_group_df = pd.merge(
|
| 322 |
+
current_sector_group_df, df_to_merge, on="code", how="left"
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
all_processed_sectors_dfs.append(current_sector_group_df)
|
| 326 |
+
|
| 327 |
+
# Concatenate all the processed sector DataFrames
|
| 328 |
+
all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
|
| 329 |
+
# save_dataframe(all_sectors_dfs, "all_sectors_dfs.csv")
|
| 330 |
+
|
| 331 |
+
return all_sectors_dfs
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def lte_database_for_capacity(dump_path: str):
|
| 335 |
+
dfs = process_lte_data(dump_path)
|
| 336 |
+
lte_fdd = dfs[0]
|
| 337 |
+
lte_tdd = dfs[1]
|
| 338 |
+
|
| 339 |
+
lte_fdd = lte_fdd[LTE_DATABASE_COLUMNS]
|
| 340 |
+
lte_tdd = lte_tdd[LTE_DATABASE_COLUMNS]
|
| 341 |
+
|
| 342 |
+
lte_db = pd.concat([lte_fdd, lte_tdd], axis=0)
|
| 343 |
+
|
| 344 |
+
# rename final_name to LNCEL_name
|
| 345 |
+
lte_db = lte_db.rename(columns={"final_name": "LNCEL_name"})
|
| 346 |
+
|
| 347 |
+
# save_dataframe(lte_db, "LTE_Database.csv")
|
| 348 |
+
return lte_db
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def lte_bh_dfs_per_kpi(
|
| 352 |
+
dump_path: str,
|
| 353 |
+
df: pd.DataFrame,
|
| 354 |
+
number_of_kpi_days: int = 7,
|
| 355 |
+
availability_threshold: int = 95,
|
| 356 |
+
prb_usage_threshold: int = 80,
|
| 357 |
+
prb_diff_between_cells_threshold: int = 20,
|
| 358 |
+
number_of_threshold_days: int = 3,
|
| 359 |
+
main_prb_to_use: str = "",
|
| 360 |
+
) -> pd.DataFrame:
|
| 361 |
+
|
| 362 |
+
# print(df.columns)
|
| 363 |
+
|
| 364 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 365 |
+
df=df,
|
| 366 |
+
pivot_date_column="date",
|
| 367 |
+
pivot_name_column="LNCEL_name",
|
| 368 |
+
kpi_columns_from=2,
|
| 369 |
+
)
|
| 370 |
+
cell_availability_df = cell_availability_analysis(
|
| 371 |
+
df=pivoted_kpi_dfs["Cell_Avail_excl_BLU"],
|
| 372 |
+
days=number_of_kpi_days,
|
| 373 |
+
availability_threshold=availability_threshold,
|
| 374 |
+
)
|
| 375 |
+
prb_usage_df = analyze_prb_usage(
|
| 376 |
+
df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
|
| 377 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 378 |
+
prb_usage_threshold=prb_usage_threshold,
|
| 379 |
+
analysis_type="BH",
|
| 380 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 381 |
+
suffix="" if main_prb_to_use == "E-UTRAN Avg PRB usage per TTI DL" else "_2nd",
|
| 382 |
+
)
|
| 383 |
+
prb_lev10_usage_df = analyze_prb_usage(
|
| 384 |
+
df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
|
| 385 |
+
number_of_kpi_days=number_of_kpi_days,
|
| 386 |
+
prb_usage_threshold=prb_usage_threshold,
|
| 387 |
+
analysis_type="BH",
|
| 388 |
+
number_of_threshold_days=number_of_threshold_days,
|
| 389 |
+
suffix="" if main_prb_to_use == "DL PRB Util p TTI Lev_10" else "_2nd",
|
| 390 |
+
)
|
| 391 |
+
act_ues_df = pivoted_kpi_dfs["Avg_act_UEs_DL"]
|
| 392 |
+
# Add Max and avg columns for act_ues_df
|
| 393 |
+
act_ues_df["max_act_ues"] = act_ues_df.max(axis=1)
|
| 394 |
+
act_ues_df["avg_act_ues"] = act_ues_df.mean(axis=1)
|
| 395 |
+
dl_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_DL"]
|
| 396 |
+
# Add Max and avg columns for dl_thp_df
|
| 397 |
+
dl_thp_df["max_dl_thp"] = dl_thp_df.max(axis=1)
|
| 398 |
+
dl_thp_df["avg_dl_thp"] = dl_thp_df.mean(axis=1)
|
| 399 |
+
ul_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_UL"]
|
| 400 |
+
# Add Max and avg columns for ul_thp_df
|
| 401 |
+
ul_thp_df["max_ul_thp"] = ul_thp_df.max(axis=1)
|
| 402 |
+
ul_thp_df["avg_ul_thp"] = ul_thp_df.mean(axis=1)
|
| 403 |
+
|
| 404 |
+
bh_kpi_df = pd.concat(
|
| 405 |
+
[
|
| 406 |
+
cell_availability_df,
|
| 407 |
+
prb_lev10_usage_df,
|
| 408 |
+
prb_usage_df,
|
| 409 |
+
act_ues_df,
|
| 410 |
+
dl_thp_df,
|
| 411 |
+
ul_thp_df,
|
| 412 |
+
],
|
| 413 |
+
axis=1,
|
| 414 |
+
)
|
| 415 |
+
bh_kpi_df = bh_kpi_df.reset_index()
|
| 416 |
+
prb_df = bh_kpi_df[PRB_COLUMNS]
|
| 417 |
+
|
| 418 |
+
# drop row if lnCEL_name is empty or 1
|
| 419 |
+
prb_df = prb_df[prb_df["LNCEL_name"].str.len() > 3]
|
| 420 |
+
# prb_df = prb_df.reset_index()
|
| 421 |
+
prb_df = prb_df.droplevel(level=1, axis=1) # Drop the first level (date)
|
| 422 |
+
# prb_df = prb_df.reset_index()
|
| 423 |
+
# prb_df["code"] = prb_df["LNCEL_name"].str.split("_").str[0]
|
| 424 |
+
|
| 425 |
+
lte_db = lte_database_for_capacity(dump_path)
|
| 426 |
+
|
| 427 |
+
db_and_prb = pd.merge(lte_db, prb_df, on="LNCEL_name", how="left")
|
| 428 |
+
|
| 429 |
+
# if avg_prb_usage_bh is "" then set it to "cell exists in dump but not in BH report"
|
| 430 |
+
# db_and_prb.loc[db_and_prb["avg_prb_usage_bh"].isnull(), "avg_prb_usage_bh"] = (
|
| 431 |
+
# "cell exists in dump but not in BH report"
|
| 432 |
+
# )
|
| 433 |
+
# drop row if lnCEL_name is empty or 1
|
| 434 |
+
db_and_prb = db_and_prb[db_and_prb["LNCEL_name"].str.len() > 3]
|
| 435 |
+
|
| 436 |
+
lte_analysis_df = dfs_per_band_cell(db_and_prb)
|
| 437 |
+
lte_analysis_df = lte_analysis_logic(
|
| 438 |
+
lte_analysis_df,
|
| 439 |
+
prb_usage_threshold,
|
| 440 |
+
prb_diff_between_cells_threshold,
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
lte_analysis_df = lte_analysis_df[LTE_ANALYSIS_COLUMNS]
|
| 444 |
+
# Rename columns
|
| 445 |
+
lte_analysis_df = lte_analysis_df.rename(
|
| 446 |
+
columns={
|
| 447 |
+
"LNCEL_name_l800": "name_l800",
|
| 448 |
+
"LNCEL_name_l1800": "name_l1800",
|
| 449 |
+
"LNCEL_name_l2300": "name_l2300",
|
| 450 |
+
"LNCEL_name_l2600": "name_l2600",
|
| 451 |
+
"LNCEL_name_l1800s": "name_l1800s",
|
| 452 |
+
"avg_prb_usage_bh_l800": "prb_l800",
|
| 453 |
+
"avg_prb_usage_bh_l1800": "prb_l1800",
|
| 454 |
+
"avg_prb_usage_bh_l2300": "prb_l2300",
|
| 455 |
+
"avg_prb_usage_bh_l2600": "prb_l2600",
|
| 456 |
+
"avg_prb_usage_bh_l1800s": "prb_l1800s",
|
| 457 |
+
"avg_prb_usage_bh_l800_2nd": "prb_l800_2nd",
|
| 458 |
+
"avg_prb_usage_bh_l1800_2nd": "prb_l1800_2nd",
|
| 459 |
+
"avg_prb_usage_bh_l2300_2nd": "prb_l2300_2nd",
|
| 460 |
+
"avg_prb_usage_bh_l2600_2nd": "prb_l2600_2nd",
|
| 461 |
+
"avg_prb_usage_bh_l1800s_2nd": "prb_l1800s_2nd",
|
| 462 |
+
"avg_act_ues_l800": "act_ues_l800",
|
| 463 |
+
"avg_act_ues_l1800": "act_ues_l1800",
|
| 464 |
+
"avg_act_ues_l2300": "act_ues_l2300",
|
| 465 |
+
"avg_act_ues_l2600": "act_ues_l2600",
|
| 466 |
+
"avg_act_ues_l1800s": "act_ues_l1800s",
|
| 467 |
+
"avg_dl_thp_l800": "dl_thp_l800",
|
| 468 |
+
"avg_dl_thp_l1800": "dl_thp_l1800",
|
| 469 |
+
"avg_dl_thp_l2300": "dl_thp_l2300",
|
| 470 |
+
"avg_dl_thp_l2600": "dl_thp_l2600",
|
| 471 |
+
"avg_dl_thp_l1800s": "dl_thp_l1800s",
|
| 472 |
+
"avg_ul_thp_l800": "ul_thp_l800",
|
| 473 |
+
"avg_ul_thp_l1800": "ul_thp_l1800",
|
| 474 |
+
"avg_ul_thp_l2300": "ul_thp_l2300",
|
| 475 |
+
"avg_ul_thp_l2600": "ul_thp_l2600",
|
| 476 |
+
"avg_ul_thp_l1800s": "ul_thp_l1800s",
|
| 477 |
+
}
|
| 478 |
+
)
|
| 479 |
+
|
| 480 |
+
return [bh_kpi_df, lte_analysis_df]
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def process_lte_bh_report(
|
| 484 |
+
dump_path: str,
|
| 485 |
+
bh_report_path: str,
|
| 486 |
+
num_last_days: int,
|
| 487 |
+
num_threshold_days: int,
|
| 488 |
+
availability_threshold: float,
|
| 489 |
+
prb_usage_threshold: float,
|
| 490 |
+
prb_diff_between_cells_threshold: float,
|
| 491 |
+
main_prb_to_use: str,
|
| 492 |
+
) -> dict:
|
| 493 |
+
"""
|
| 494 |
+
Process LTE Busy Hour report and perform capacity analysis
|
| 495 |
+
|
| 496 |
+
Args:
|
| 497 |
+
bh_report_path: Path to BH report CSV file
|
| 498 |
+
num_last_days: Number of last days for analysis
|
| 499 |
+
num_threshold_days: Number of days for threshold calculation
|
| 500 |
+
availability_threshold: Minimum required availability
|
| 501 |
+
prb_usage_threshold: Maximum allowed PRB usage
|
| 502 |
+
prb_diff_between_cells_threshold: Maximum allowed PRB usage difference between cells
|
| 503 |
+
|
| 504 |
+
Returns:
|
| 505 |
+
Dictionary containing analysis results and DataFrames
|
| 506 |
+
"""
|
| 507 |
+
LteCapacity.final_results = None
|
| 508 |
+
# lte_db_dfs = lte_database_for_capacity(dump_path)
|
| 509 |
+
|
| 510 |
+
# Read BH report
|
| 511 |
+
df = pd.read_csv(bh_report_path, delimiter=";")
|
| 512 |
+
df = kpi_naming_cleaning(df)
|
| 513 |
+
# print(df.columns)
|
| 514 |
+
df = create_hourly_date(df)
|
| 515 |
+
df = df[KPI_COLUMNS]
|
| 516 |
+
pivoted_kpi_dfs = lte_bh_dfs_per_kpi(
|
| 517 |
+
dump_path=dump_path,
|
| 518 |
+
df=df,
|
| 519 |
+
number_of_kpi_days=num_last_days,
|
| 520 |
+
availability_threshold=availability_threshold,
|
| 521 |
+
prb_usage_threshold=prb_usage_threshold,
|
| 522 |
+
prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
|
| 523 |
+
number_of_threshold_days=num_threshold_days,
|
| 524 |
+
main_prb_to_use=main_prb_to_use,
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
# save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")
|
| 528 |
+
return pivoted_kpi_dfs
|
process_kpi/process_wbts_capacity.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from utils.kpi_analysis_utils import (
|
| 4 |
+
cell_availability_analysis,
|
| 5 |
+
combine_comments,
|
| 6 |
+
create_daily_date,
|
| 7 |
+
create_dfs_per_kpi,
|
| 8 |
+
kpi_naming_cleaning,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class WbtsCapacity:
|
| 13 |
+
final_results: pd.DataFrame = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str:
|
| 17 |
+
"""
|
| 18 |
+
Check if any value in the row deviates more than max_diff from the most common value.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
row: Series of values to check for deviation
|
| 22 |
+
max_diff: Maximum allowed difference from the most common value
|
| 23 |
+
type: Type identifier for the deviation message
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
A message indicating deviation if found, otherwise an empty string
|
| 27 |
+
"""
|
| 28 |
+
numeric_row = row.astype(float) # Ensure numeric
|
| 29 |
+
mode_series = numeric_row.mode()
|
| 30 |
+
|
| 31 |
+
# Safe fallback in case mode is empty
|
| 32 |
+
most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0]
|
| 33 |
+
|
| 34 |
+
diffs = abs(numeric_row - most_common)
|
| 35 |
+
|
| 36 |
+
if (diffs > max_diff).any():
|
| 37 |
+
return f"{type} Deviation > {max_diff} detected"
|
| 38 |
+
else:
|
| 39 |
+
return ""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def max_used_bb_subunits_analysis(
|
| 43 |
+
df: pd.DataFrame,
|
| 44 |
+
days: int = 7,
|
| 45 |
+
threshold: int = 80,
|
| 46 |
+
number_of_threshold_days: int = 3,
|
| 47 |
+
) -> pd.DataFrame:
|
| 48 |
+
"""
|
| 49 |
+
Analyze maximum used baseband subunits and identify sites needing upgrades.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
df: DataFrame containing baseband utilization data
|
| 53 |
+
days: Number of days to analyze
|
| 54 |
+
threshold: Utilization threshold percentage for flagging
|
| 55 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
DataFrame with analysis results and upgrade recommendations
|
| 59 |
+
"""
|
| 60 |
+
result_df = df.copy()
|
| 61 |
+
last_days_df = result_df.iloc[:, -days:]
|
| 62 |
+
last_days_df = last_days_df.fillna(0)
|
| 63 |
+
|
| 64 |
+
result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2)
|
| 65 |
+
# Count the number of days above threshold
|
| 66 |
+
result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply(
|
| 67 |
+
lambda row: sum(1 for x in row if x >= threshold), axis=1
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Initialize comment column
|
| 71 |
+
result_df["Average_used_bb_ratio_comment"] = ""
|
| 72 |
+
|
| 73 |
+
# Apply condition for upgrade recommendation
|
| 74 |
+
result_df.loc[
|
| 75 |
+
(result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
|
| 76 |
+
& (result_df["Average_used_bb_ratio"] >= threshold),
|
| 77 |
+
"Average_used_bb_ratio_comment",
|
| 78 |
+
] = "need BB upgrade"
|
| 79 |
+
|
| 80 |
+
return result_df
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def max_used_ce_analysis(
|
| 84 |
+
df: pd.DataFrame,
|
| 85 |
+
days: int = 7,
|
| 86 |
+
threshold: int = 80,
|
| 87 |
+
number_of_threshold_days: int = 3,
|
| 88 |
+
) -> pd.DataFrame:
|
| 89 |
+
"""
|
| 90 |
+
Analyze maximum used channel elements and identify sites needing upgrades.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
df: DataFrame containing channel element utilization data
|
| 94 |
+
days: Number of days to analyze
|
| 95 |
+
threshold: Utilization threshold percentage for flagging
|
| 96 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
DataFrame with analysis results and upgrade recommendations
|
| 100 |
+
"""
|
| 101 |
+
result_df = df.copy().fillna(0)
|
| 102 |
+
last_days_df = result_df.iloc[:, -days:]
|
| 103 |
+
|
| 104 |
+
result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2)
|
| 105 |
+
|
| 106 |
+
# Count the number of days above threshold
|
| 107 |
+
result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply(
|
| 108 |
+
lambda row: sum(1 for x in row if x >= threshold), axis=1
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Initialize comment column
|
| 112 |
+
result_df["Average_used_ce_ratio_comment"] = ""
|
| 113 |
+
|
| 114 |
+
# Apply condition for upgrade recommendation
|
| 115 |
+
result_df.loc[
|
| 116 |
+
(result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
|
| 117 |
+
& (result_df["Average_used_ce_ratio"] >= threshold),
|
| 118 |
+
"Average_used_ce_ratio_comment",
|
| 119 |
+
] = "need CE upgrade"
|
| 120 |
+
|
| 121 |
+
return result_df
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame:
|
| 125 |
+
"""
|
| 126 |
+
Analyze baseband subunit count for deviations.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
df: DataFrame containing baseband subunit count data
|
| 130 |
+
days: Number of days to analyze
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
DataFrame with deviation analysis comments
|
| 134 |
+
"""
|
| 135 |
+
result_df = df.copy()
|
| 136 |
+
last_days_df = result_df.iloc[:, -days:]
|
| 137 |
+
result_df["num_bb_subunits_comment"] = last_days_df.apply(
|
| 138 |
+
lambda row: check_deviation(row, type="bb"), axis=1
|
| 139 |
+
)
|
| 140 |
+
return result_df
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
|
| 144 |
+
"""
|
| 145 |
+
Analyze available channel elements for deviations.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
df: DataFrame containing available channel element data
|
| 149 |
+
days: Number of days to analyze
|
| 150 |
+
|
| 151 |
+
Returns:
|
| 152 |
+
DataFrame with deviation analysis comments
|
| 153 |
+
"""
|
| 154 |
+
result_df = df.copy()
|
| 155 |
+
last_days_df = result_df.iloc[:, -days:]
|
| 156 |
+
result_df["avail_ce_comment"] = last_days_df.apply(
|
| 157 |
+
lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1
|
| 158 |
+
)
|
| 159 |
+
return result_df
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
| 163 |
+
"""
|
| 164 |
+
Combine baseband related comments into a single column.
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
df: DataFrame containing baseband comment columns
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
DataFrame with combined baseband comments
|
| 171 |
+
"""
|
| 172 |
+
return combine_comments(
|
| 173 |
+
df,
|
| 174 |
+
"num_bb_subunits_comment",
|
| 175 |
+
"Average_used_bb_ratio_comment",
|
| 176 |
+
"availability_comment_daily",
|
| 177 |
+
new_column="bb_comments",
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
| 182 |
+
"""
|
| 183 |
+
Combine channel element related comments into a single column.
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
df: DataFrame containing channel element comment columns
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
DataFrame with combined channel element comments
|
| 190 |
+
"""
|
| 191 |
+
return combine_comments(
|
| 192 |
+
df,
|
| 193 |
+
"avail_ce_comment",
|
| 194 |
+
"Average_used_ce_ratio_comment",
|
| 195 |
+
"availability_comment_daily",
|
| 196 |
+
new_column="ce_comments",
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def wbts_kpi_analysis(
|
| 201 |
+
df: pd.DataFrame,
|
| 202 |
+
num_days: int = 7,
|
| 203 |
+
threshold: int = 80,
|
| 204 |
+
number_of_threshold_days: int = 3,
|
| 205 |
+
) -> pd.DataFrame:
|
| 206 |
+
"""
|
| 207 |
+
Create pivoted DataFrames for each KPI and perform analysis.
|
| 208 |
+
|
| 209 |
+
Args:
|
| 210 |
+
df: DataFrame containing KPI data
|
| 211 |
+
num_days: Number of days to analyze
|
| 212 |
+
threshold: Utilization threshold percentage for flagging
|
| 213 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
DataFrame with combined analysis results
|
| 217 |
+
"""
|
| 218 |
+
# kpi_columns = df.columns[5:]
|
| 219 |
+
pivoted_kpi_dfs = {}
|
| 220 |
+
|
| 221 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 222 |
+
df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Extract individual KPI DataFrames
|
| 226 |
+
wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
|
| 227 |
+
licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"]
|
| 228 |
+
max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"]
|
| 229 |
+
max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"]
|
| 230 |
+
max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"]
|
| 231 |
+
max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"]
|
| 232 |
+
num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"]
|
| 233 |
+
max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"]
|
| 234 |
+
cell_availability_df = pivoted_kpi_dfs[
|
| 235 |
+
"Cell_Availability_excluding_blocked_by_user_state_BLU"
|
| 236 |
+
]
|
| 237 |
+
total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
|
| 238 |
+
total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"]
|
| 239 |
+
max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"]
|
| 240 |
+
|
| 241 |
+
# Perform analysis on each KPI DataFrame
|
| 242 |
+
max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis(
|
| 243 |
+
max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days
|
| 244 |
+
)
|
| 245 |
+
cell_availability_df = cell_availability_analysis(cell_availability_df, num_days)
|
| 246 |
+
max_used_ce_ratio_flexi_df = max_used_ce_analysis(
|
| 247 |
+
max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days
|
| 248 |
+
)
|
| 249 |
+
num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days)
|
| 250 |
+
licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days)
|
| 251 |
+
|
| 252 |
+
# Concatenate all DataFrames
|
| 253 |
+
result_df = pd.concat(
|
| 254 |
+
[
|
| 255 |
+
wbts_name_df,
|
| 256 |
+
licensed_ce_df,
|
| 257 |
+
max_used_ce_dl_df,
|
| 258 |
+
max_used_ce_ul_df,
|
| 259 |
+
max_avail_ce_df,
|
| 260 |
+
max_used_bb_subunits_df,
|
| 261 |
+
num_bb_subunits_df,
|
| 262 |
+
max_bb_sus_util_ratio_df,
|
| 263 |
+
cell_availability_df,
|
| 264 |
+
total_cs_traffic_df,
|
| 265 |
+
total_data_traffic_df,
|
| 266 |
+
max_used_ce_ratio_flexi_df,
|
| 267 |
+
],
|
| 268 |
+
axis=1,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
# Add combined comments analysis
|
| 272 |
+
result_df = bb_comments_analysis(result_df)
|
| 273 |
+
result_df = ce_comments_analysis(result_df)
|
| 274 |
+
|
| 275 |
+
return result_df
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def load_data(
|
| 279 |
+
filepath: str,
|
| 280 |
+
num_days: int,
|
| 281 |
+
threshold: int,
|
| 282 |
+
number_of_threshold_days: int,
|
| 283 |
+
) -> pd.DataFrame:
|
| 284 |
+
"""
|
| 285 |
+
Load data from CSV file and perform preprocessing and analysis.
|
| 286 |
+
|
| 287 |
+
Args:
|
| 288 |
+
filepath: Path to CSV file or uploaded file object
|
| 289 |
+
num_days: Number of days to analyze
|
| 290 |
+
threshold: Utilization threshold percentage for flagging
|
| 291 |
+
number_of_threshold_days: Minimum days above threshold to flag for upgrade
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
DataFrame with processed and analyzed data
|
| 295 |
+
"""
|
| 296 |
+
df = pd.read_csv(filepath, delimiter=";")
|
| 297 |
+
|
| 298 |
+
# Preprocess data
|
| 299 |
+
df = create_daily_date(df)
|
| 300 |
+
df = kpi_naming_cleaning(df)
|
| 301 |
+
|
| 302 |
+
# Reorder columns for better organization
|
| 303 |
+
df = df[["date"] + [col for col in df.columns if col not in ["date"]]]
|
| 304 |
+
df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
|
| 305 |
+
|
| 306 |
+
# Perform KPI analysis
|
| 307 |
+
df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days)
|
| 308 |
+
|
| 309 |
+
# for col, col_index in zip(df.columns, df.columns.get_indexer(df.columns)):
|
| 310 |
+
# print(f"Column: {col}, Index: {col_index}")
|
| 311 |
+
|
| 312 |
+
return df
|
process_kpi/process_wcel_capacity.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from utils.kpi_analysis_utils import (
|
| 4 |
+
analyze_fails_kpi,
|
| 5 |
+
cell_availability_analysis,
|
| 6 |
+
combine_comments,
|
| 7 |
+
create_daily_date,
|
| 8 |
+
create_dfs_per_kpi,
|
| 9 |
+
kpi_naming_cleaning,
|
| 10 |
+
summarize_fails_comments,
|
| 11 |
+
)
|
| 12 |
+
from utils.utils_vars import get_physical_db
|
| 13 |
+
|
| 14 |
+
tx_comments_mapping = {
|
| 15 |
+
"iub_frameloss exceeded threshold": "iub frameloss",
|
| 16 |
+
"iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion",
|
| 17 |
+
"hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion",
|
| 18 |
+
}
|
| 19 |
+
operational_comments_mapping = {
|
| 20 |
+
"Down Site": "Down Cell",
|
| 21 |
+
"iub frameloss, instability": "Availability and TX issues",
|
| 22 |
+
"iub frameloss and hsdpa iub congestion, Availability OK": "TX issues",
|
| 23 |
+
"iub frameloss, Availability OK": "TX issues",
|
| 24 |
+
"critical instability": "Availability issues",
|
| 25 |
+
"iub frameloss, critical instability": "Availability and TX issues",
|
| 26 |
+
"iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues",
|
| 27 |
+
"Availability OK": "Site OK",
|
| 28 |
+
"hsdpa iub congestion, instability": "Availability and TX issues",
|
| 29 |
+
"instability": "Availability issues",
|
| 30 |
+
"hsdpa iub congestion, Availability OK": "TX issues",
|
| 31 |
+
"iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues",
|
| 32 |
+
"hsdpa iub congestion, critical instability": "Availability and TX issues",
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
fails_comments_mapping = {
|
| 36 |
+
"ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
|
| 37 |
+
"bts fails": "Bts fails",
|
| 38 |
+
"ac, bts, code fails": "Power and Code fails",
|
| 39 |
+
"ac, code fails": "Power fails",
|
| 40 |
+
"ac fails": "Power fails",
|
| 41 |
+
"ac, ac_dl fails": "Power fails",
|
| 42 |
+
"ac, bts fails": "Power and Bts fails",
|
| 43 |
+
"ac, ac_dl, bts fails": "Power and Bts fails",
|
| 44 |
+
"ac, ac_dl, code fails": "Power and Code fails",
|
| 45 |
+
"ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
|
| 46 |
+
"ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
KPI_COLUMNS = [
|
| 50 |
+
"WCEL_name",
|
| 51 |
+
"date",
|
| 52 |
+
"Cell_Availability_excluding_blocked_by_user_state_BLU",
|
| 53 |
+
"Total_CS_traffic_Erl",
|
| 54 |
+
"HSDPA_TRAFFIC_VOLUME",
|
| 55 |
+
"HSDPA_USER_THROUGHPUT",
|
| 56 |
+
"Max_simult_HSDPA_users",
|
| 57 |
+
"IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71",
|
| 58 |
+
"HSDPA_congestion_rate_in_Iub",
|
| 59 |
+
"rrc_conn_stp_fail_ac_M1001C3",
|
| 60 |
+
"RRC_CONN_STP_FAIL_AC_UL_M1001C731",
|
| 61 |
+
"RRC_CONN_STP_FAIL_AC_DL_M1001C732",
|
| 62 |
+
"RRC_CONN_STP_FAIL_AC_COD_M1001C733",
|
| 63 |
+
"rrc_conn_stp_fail_bts_M1001C4",
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
WCEL_ANALYSIS_COLUMNS = [
|
| 67 |
+
"WCEL_name",
|
| 68 |
+
"Average_cell_availability_daily",
|
| 69 |
+
"number_of_days_exceeding_availability_threshold_daily",
|
| 70 |
+
"availability_comment_daily",
|
| 71 |
+
"sum_traffic_cs",
|
| 72 |
+
"sum_traffic_dl",
|
| 73 |
+
"max_dl_throughput",
|
| 74 |
+
"avg_dl_throughput",
|
| 75 |
+
"max_users",
|
| 76 |
+
"max_iub_frameloss",
|
| 77 |
+
"number_of_days_with_iub_frameloss_exceeded",
|
| 78 |
+
"max_hsdpa_congestion_rate_iub",
|
| 79 |
+
"number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
|
| 80 |
+
"max_rrc_fail_ac",
|
| 81 |
+
"number_of_days_with_rrc_fail_ac_exceeded",
|
| 82 |
+
"max_rrc_fail_ac_ul",
|
| 83 |
+
"number_of_days_with_rrc_fail_ac_ul_exceeded",
|
| 84 |
+
"max_rrc_fail_ac_dl",
|
| 85 |
+
"number_of_days_with_rrc_fail_ac_dl_exceeded",
|
| 86 |
+
"max_rrc_fail_code",
|
| 87 |
+
"number_of_days_with_rrc_fail_code_exceeded",
|
| 88 |
+
"max_rrc_fail_bts",
|
| 89 |
+
"number_of_days_with_rrc_fail_bts_exceeded",
|
| 90 |
+
"tx_congestion_comments",
|
| 91 |
+
"operational_comments",
|
| 92 |
+
"fails_comments",
|
| 93 |
+
"final_comments",
|
| 94 |
+
]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
class WcelCapacity:
|
| 98 |
+
final_results: pd.DataFrame = None
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def wcel_kpi_analysis(
|
| 102 |
+
df: pd.DataFrame,
|
| 103 |
+
num_last_days: int,
|
| 104 |
+
num_threshold_days: int,
|
| 105 |
+
availability_threshold: int,
|
| 106 |
+
iub_frameloss_threshold: int,
|
| 107 |
+
hsdpa_congestion_rate_iub_threshold: int,
|
| 108 |
+
fails_treshold: int,
|
| 109 |
+
) -> pd.DataFrame:
|
| 110 |
+
pivoted_kpi_dfs = create_dfs_per_kpi(
|
| 111 |
+
df=df,
|
| 112 |
+
pivot_date_column="date",
|
| 113 |
+
pivot_name_column="WCEL_name",
|
| 114 |
+
kpi_columns_from=2,
|
| 115 |
+
)
|
| 116 |
+
cell_availability_df = cell_availability_analysis(
|
| 117 |
+
df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"],
|
| 118 |
+
days=num_last_days,
|
| 119 |
+
availability_threshold=availability_threshold,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Trafics, throughput and max users
|
| 123 |
+
trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
|
| 124 |
+
hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"]
|
| 125 |
+
hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
|
| 126 |
+
max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
|
| 127 |
+
# Add Max of Trafics, throughput and max users
|
| 128 |
+
trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
|
| 129 |
+
hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
|
| 130 |
+
hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
|
| 131 |
+
max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
|
| 132 |
+
# add average of Trafics, throughput and max users
|
| 133 |
+
hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
|
| 134 |
+
axis=1
|
| 135 |
+
)
|
| 136 |
+
max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1)
|
| 137 |
+
|
| 138 |
+
# TX Congestion
|
| 139 |
+
iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"]
|
| 140 |
+
hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"]
|
| 141 |
+
|
| 142 |
+
iub_frameloss_df = analyze_fails_kpi(
|
| 143 |
+
df=iub_frameloss_df,
|
| 144 |
+
number_of_kpi_days=num_last_days,
|
| 145 |
+
number_of_threshold_days=num_threshold_days,
|
| 146 |
+
kpi_threshold=iub_frameloss_threshold,
|
| 147 |
+
kpi_column_name="iub_frameloss",
|
| 148 |
+
)
|
| 149 |
+
hsdpa_congestion_rate_iub_df = analyze_fails_kpi(
|
| 150 |
+
df=hsdpa_congestion_rate_iub_df,
|
| 151 |
+
number_of_kpi_days=num_last_days,
|
| 152 |
+
number_of_threshold_days=num_threshold_days,
|
| 153 |
+
kpi_threshold=hsdpa_congestion_rate_iub_threshold,
|
| 154 |
+
kpi_column_name="hsdpa_congestion_rate_iub",
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Fails
|
| 158 |
+
rrc_conn_stp_fail_ac_df = analyze_fails_kpi(
|
| 159 |
+
df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"],
|
| 160 |
+
number_of_kpi_days=num_last_days,
|
| 161 |
+
number_of_threshold_days=num_threshold_days,
|
| 162 |
+
kpi_threshold=fails_treshold,
|
| 163 |
+
kpi_column_name="rrc_fail_ac",
|
| 164 |
+
)
|
| 165 |
+
rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi(
|
| 166 |
+
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"],
|
| 167 |
+
number_of_kpi_days=num_last_days,
|
| 168 |
+
number_of_threshold_days=num_threshold_days,
|
| 169 |
+
kpi_threshold=fails_treshold,
|
| 170 |
+
kpi_column_name="rrc_fail_ac_ul",
|
| 171 |
+
)
|
| 172 |
+
rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi(
|
| 173 |
+
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"],
|
| 174 |
+
number_of_kpi_days=num_last_days,
|
| 175 |
+
number_of_threshold_days=num_threshold_days,
|
| 176 |
+
kpi_threshold=fails_treshold,
|
| 177 |
+
kpi_column_name="rrc_fail_ac_dl",
|
| 178 |
+
)
|
| 179 |
+
rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi(
|
| 180 |
+
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"],
|
| 181 |
+
number_of_kpi_days=num_last_days,
|
| 182 |
+
number_of_threshold_days=num_threshold_days,
|
| 183 |
+
kpi_threshold=fails_treshold,
|
| 184 |
+
kpi_column_name="rrc_fail_code",
|
| 185 |
+
)
|
| 186 |
+
rrc_conn_stp_fail_bts_df = analyze_fails_kpi(
|
| 187 |
+
df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"],
|
| 188 |
+
number_of_kpi_days=num_last_days,
|
| 189 |
+
number_of_threshold_days=num_threshold_days,
|
| 190 |
+
kpi_threshold=fails_treshold,
|
| 191 |
+
kpi_column_name="rrc_fail_bts",
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
kpi_df = pd.concat(
|
| 195 |
+
[
|
| 196 |
+
cell_availability_df,
|
| 197 |
+
trafic_cs_df,
|
| 198 |
+
hsdpa_traffic_df,
|
| 199 |
+
hsdpa_user_throughput_df,
|
| 200 |
+
max_simult_hsdpa_users_df,
|
| 201 |
+
iub_frameloss_df,
|
| 202 |
+
hsdpa_congestion_rate_iub_df,
|
| 203 |
+
rrc_conn_stp_fail_ac_df,
|
| 204 |
+
rrc_conn_stp_fail_ac_ul_df,
|
| 205 |
+
rrc_conn_stp_fail_ac_dl_df,
|
| 206 |
+
rrc_conn_stp_fail_ac_cod_df,
|
| 207 |
+
rrc_conn_stp_fail_bts_df,
|
| 208 |
+
],
|
| 209 |
+
axis=1,
|
| 210 |
+
)
|
| 211 |
+
kpi_df = kpi_df.reset_index()
|
| 212 |
+
|
| 213 |
+
kpi_df = combine_comments(
|
| 214 |
+
kpi_df,
|
| 215 |
+
"iub_frameloss_comment",
|
| 216 |
+
"hsdpa_congestion_rate_iub_comment",
|
| 217 |
+
new_column="tx_congestion_comments",
|
| 218 |
+
)
|
| 219 |
+
kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply(
|
| 220 |
+
lambda x: tx_comments_mapping.get(x, x)
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
kpi_df = combine_comments(
|
| 224 |
+
kpi_df,
|
| 225 |
+
"tx_congestion_comments",
|
| 226 |
+
"availability_comment_daily",
|
| 227 |
+
new_column="operational_comments",
|
| 228 |
+
)
|
| 229 |
+
kpi_df["operational_comments"] = kpi_df["operational_comments"].apply(
|
| 230 |
+
lambda x: operational_comments_mapping.get(x, x)
|
| 231 |
+
)
|
| 232 |
+
kpi_df = combine_comments(
|
| 233 |
+
kpi_df,
|
| 234 |
+
"rrc_fail_ac_comment",
|
| 235 |
+
"rrc_fail_ac_ul_comment",
|
| 236 |
+
"rrc_fail_ac_dl_comment",
|
| 237 |
+
"rrc_fail_code_comment",
|
| 238 |
+
"rrc_fail_bts_comment",
|
| 239 |
+
new_column="fails_comments",
|
| 240 |
+
)
|
| 241 |
+
kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
|
| 242 |
+
kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
|
| 243 |
+
lambda x: fails_comments_mapping.get(x, x)
|
| 244 |
+
)
|
| 245 |
+
kpi_df = combine_comments(
|
| 246 |
+
kpi_df,
|
| 247 |
+
"operational_comments",
|
| 248 |
+
"fails_comments",
|
| 249 |
+
new_column="final_comments",
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
|
| 253 |
+
wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)
|
| 254 |
+
|
| 255 |
+
# Rename
|
| 256 |
+
wcel_analysis_df = wcel_analysis_df.rename(
|
| 257 |
+
columns={
|
| 258 |
+
"WCEL_name": "name",
|
| 259 |
+
"Average_cell_availability_daily": "Avg_availability",
|
| 260 |
+
"number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
|
| 261 |
+
"availability_comment_daily": "availability_comment",
|
| 262 |
+
"number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
|
| 263 |
+
"number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
|
| 264 |
+
"number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
|
| 265 |
+
"number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
|
| 266 |
+
"number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
|
| 267 |
+
"number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
|
| 268 |
+
"number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
|
| 269 |
+
}
|
| 270 |
+
)
|
| 271 |
+
# remove row if name less than 5 characters
|
| 272 |
+
wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]
|
| 273 |
+
|
| 274 |
+
wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
|
| 275 |
+
wcel_analysis_df["code"] = (
|
| 276 |
+
pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
|
| 277 |
+
)
|
| 278 |
+
wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
|
| 279 |
+
# move code to the first column
|
| 280 |
+
wcel_analysis_df = wcel_analysis_df[
|
| 281 |
+
["code", "Region"]
|
| 282 |
+
+ [col for col in wcel_analysis_df if col != "code" and col != "Region"]
|
| 283 |
+
]
|
| 284 |
+
|
| 285 |
+
# Load physical database
|
| 286 |
+
physical_db: pd.DataFrame = get_physical_db()
|
| 287 |
+
|
| 288 |
+
# Convert code_sector to code
|
| 289 |
+
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
|
| 290 |
+
# remove duplicates
|
| 291 |
+
physical_db = physical_db.drop_duplicates(subset="code")
|
| 292 |
+
|
| 293 |
+
# keep only code and longitude and latitude
|
| 294 |
+
physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
|
| 295 |
+
|
| 296 |
+
physical_db["code"] = (
|
| 297 |
+
pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
wcel_analysis_df = pd.merge(
|
| 301 |
+
wcel_analysis_df,
|
| 302 |
+
physical_db,
|
| 303 |
+
on="code",
|
| 304 |
+
how="left",
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
return [wcel_analysis_df, kpi_df]
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def load_and_process_wcel_capacity_data(
|
| 311 |
+
uploaded_file: pd.DataFrame,
|
| 312 |
+
num_last_days: int,
|
| 313 |
+
num_threshold_days: int,
|
| 314 |
+
availability_threshold: int,
|
| 315 |
+
iub_frameloss_threshold: int,
|
| 316 |
+
hsdpa_congestion_rate_iub_threshold: int,
|
| 317 |
+
fails_treshold: int,
|
| 318 |
+
) -> pd.DataFrame:
|
| 319 |
+
"""
|
| 320 |
+
Load and process data for WCEL capacity analysis.
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
uploaded_file: Uploaded CSV file containing WCEL capacity data
|
| 324 |
+
num_last_days: Number of days for analysis
|
| 325 |
+
num_threshold_days: Minimum days above threshold to flag for upgrade
|
| 326 |
+
availability_threshold: Utilization threshold percentage for flagging
|
| 327 |
+
iub_frameloss_threshold: Utilization threshold percentage for flagging
|
| 328 |
+
hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging
|
| 329 |
+
fails_treshold: Utilization threshold percentage for flagging
|
| 330 |
+
|
| 331 |
+
Returns:
|
| 332 |
+
Processed DataFrame with WCEL capacity analysis results
|
| 333 |
+
"""
|
| 334 |
+
# Load data
|
| 335 |
+
df = pd.read_csv(uploaded_file, delimiter=";")
|
| 336 |
+
df = kpi_naming_cleaning(df)
|
| 337 |
+
df = create_daily_date(df)
|
| 338 |
+
df = df[KPI_COLUMNS]
|
| 339 |
+
dfs = wcel_kpi_analysis(
|
| 340 |
+
df,
|
| 341 |
+
num_last_days,
|
| 342 |
+
num_threshold_days,
|
| 343 |
+
availability_threshold,
|
| 344 |
+
iub_frameloss_threshold,
|
| 345 |
+
hsdpa_congestion_rate_iub_threshold,
|
| 346 |
+
fails_treshold,
|
| 347 |
+
)
|
| 348 |
+
return dfs
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
panel>=1.4
|
| 2 |
+
bokeh>=3.4
|
| 3 |
+
pandas>=2.0
|
| 4 |
+
numpy>=1.23
|
| 5 |
+
plotly>=5.0
|
| 6 |
+
xlsxwriter>=3.0
|
| 7 |
+
|
| 8 |
+
pyarrow>=14.0
|
| 9 |
+
duckdb>=0.9
|
| 10 |
+
openpyxl>=3.1
|
utils/azimuth_validation.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
from utils.convert_to_excel import save_dataframe
|
| 4 |
+
|
| 5 |
+
url = r"./physical_db/physical_database.csv"
|
| 6 |
+
|
| 7 |
+
df = pd.read_csv(url)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def validate_azimuth(group):
|
| 11 |
+
"""
|
| 12 |
+
Validates the azimuth ordering within a group.
|
| 13 |
+
|
| 14 |
+
This function checks if the azimuth values are strictly increasing when there are exactly three values.
|
| 15 |
+
To make sure that Sector 3 is higher than Sector 2 and Sector 2 is higher than Sector 1
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
group (pd.DataFrame): A DataFrame group containing an 'Azimut' column.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
bool: True if the azimuth values are strictly increasing when there are exactly three values, False otherwise.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
azimuths = group.get("Azimut", []).values
|
| 25 |
+
if len(azimuths) == 3 and not (azimuths[0] < azimuths[1] < azimuths[2]):
|
| 26 |
+
return False
|
| 27 |
+
return True
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# Apply validation per 'code'
|
| 31 |
+
azimut_verification = df.groupby("CODE").apply(lambda x: validate_azimuth(x))
|
| 32 |
+
df["Azimut_verification"] = df["CODE"].map(azimut_verification)
|
| 33 |
+
|
| 34 |
+
save_dataframe(df, "azimut_verification")
|
| 35 |
+
# print(df)
|
utils/check_sheet_exist.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class DumpType:
|
| 5 |
+
full_dump = False
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Technology:
|
| 9 |
+
gsm = False
|
| 10 |
+
wcdma = False
|
| 11 |
+
lte = False
|
| 12 |
+
neighbors = False
|
| 13 |
+
trx = False
|
| 14 |
+
mrbts = False
|
| 15 |
+
mal = False
|
| 16 |
+
invunit = False
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Dictionary of sheet groups to check
|
| 20 |
+
sheets_to_check = {
|
| 21 |
+
"gsm": ["BTS", "BCF", "TRX", "MAL"],
|
| 22 |
+
"neighbors": ["ADCE", "ADJS", "ADJI", "ADJG", "ADJW", "BTS", "WCEL"],
|
| 23 |
+
"wcdma": ["WCEL", "WBTS", "WNCEL"],
|
| 24 |
+
"lte": ["LNBTS", "LNCEL", "LNCEL_FDD", "LNCEL_TDD"],
|
| 25 |
+
"trx": ["TRX", "BTS"],
|
| 26 |
+
"mrbts": ["MRBTS"],
|
| 27 |
+
"mal": ["MAL", "BTS"],
|
| 28 |
+
"invunit": ["INVUNIT"],
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def load(file_path):
|
| 33 |
+
# Load the Excel file
|
| 34 |
+
xlsb_file = pd.ExcelFile(file_path, engine="calamine")
|
| 35 |
+
|
| 36 |
+
# Get all sheet names in the file
|
| 37 |
+
available_sheets = xlsb_file.sheet_names
|
| 38 |
+
return available_sheets
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def check_sheets(technology_attr, sheet_list, file_path):
|
| 42 |
+
"""
|
| 43 |
+
Check if all sheets in the given sheet_list exist in the Excel file.
|
| 44 |
+
|
| 45 |
+
Parameters
|
| 46 |
+
----------
|
| 47 |
+
technology_attr : str
|
| 48 |
+
The attribute of the Technology class to set.
|
| 49 |
+
sheet_list : list[str]
|
| 50 |
+
The list of sheet names to check.
|
| 51 |
+
|
| 52 |
+
Returns
|
| 53 |
+
-------
|
| 54 |
+
None
|
| 55 |
+
"""
|
| 56 |
+
available_sheets = load(file_path)
|
| 57 |
+
missing_sheets = [sheet for sheet in sheet_list if sheet not in available_sheets]
|
| 58 |
+
available_sheets_in_list = [
|
| 59 |
+
sheet for sheet in sheet_list if sheet in available_sheets
|
| 60 |
+
]
|
| 61 |
+
if not missing_sheets:
|
| 62 |
+
setattr(Technology, technology_attr, True)
|
| 63 |
+
# print(getattr(Technology, technology_attr))
|
| 64 |
+
# print(f"available:", available_sheets_in_list)
|
| 65 |
+
# print("All sheets exist")
|
| 66 |
+
|
| 67 |
+
# else:
|
| 68 |
+
# print(f"Missing sheets: {missing_sheets}")
|
| 69 |
+
# print(f"available:", available_sheets_in_list)
|
| 70 |
+
# print(getattr(Technology, technology_attr))
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# Check each technology's sheets
|
| 74 |
+
def execute_checks_sheets_exist(file_path):
|
| 75 |
+
Technology.gsm = False
|
| 76 |
+
Technology.wcdma = False
|
| 77 |
+
Technology.lte = False
|
| 78 |
+
Technology.neighbors = False
|
| 79 |
+
Technology.trx = False
|
| 80 |
+
Technology.mrbts = False
|
| 81 |
+
Technology.invunit = False
|
| 82 |
+
Technology.mal = False
|
| 83 |
+
DumpType.full_dump = False
|
| 84 |
+
for tech_attr, sheets in sheets_to_check.items():
|
| 85 |
+
check_sheets(tech_attr, sheets, file_path)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# execute_checks_sheets_exist(
|
| 89 |
+
# r"C:\Users\David\Documents\PROJECTS\2023\PROJET 2023\DUMP\DUMP\2142\DUMP 2142.xlsb"
|
| 90 |
+
# )
|
utils/config_band.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def config_band(df: pd.DataFrame) -> pd.DataFrame:
|
| 5 |
+
"""
|
| 6 |
+
Create a dataframe that contains the site configuration band for each site code.
|
| 7 |
+
|
| 8 |
+
Parameters
|
| 9 |
+
----------
|
| 10 |
+
df : pd.DataFrame
|
| 11 |
+
The dataframe containing the site information, with columns "code" and "band"
|
| 12 |
+
|
| 13 |
+
Returns
|
| 14 |
+
-------
|
| 15 |
+
pd.DataFrame
|
| 16 |
+
The dataframe containing the site configuration band for each site code, with columns "code" and "site_config_band"
|
| 17 |
+
"""
|
| 18 |
+
df_band = df[["code", "band"]].copy()
|
| 19 |
+
df_band["ID"] = df_band[["code", "band"]].astype(str).apply("_".join, axis=1)
|
| 20 |
+
# remove duplicates ID
|
| 21 |
+
df_band = df_band.drop_duplicates(subset=["ID"])
|
| 22 |
+
df_band = df_band[["code", "band"]]
|
| 23 |
+
df_band["band"] = df_band["band"].fillna("empty")
|
| 24 |
+
df_band = (
|
| 25 |
+
df_band.groupby("code")["band"]
|
| 26 |
+
.apply(lambda x: "/".join(sorted(x)))
|
| 27 |
+
.reset_index()
|
| 28 |
+
)
|
| 29 |
+
# rename band to config
|
| 30 |
+
df_band.rename(columns={"band": "site_config_band"}, inplace=True)
|
| 31 |
+
|
| 32 |
+
return df_band
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def bcf_band(df: pd.DataFrame) -> pd.DataFrame:
|
| 36 |
+
"""
|
| 37 |
+
Create a dataframe that contains the bcf configuration band for each bcf ID.
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
df : pd.DataFrame
|
| 42 |
+
The dataframe containing the bcf information, with columns "ID" and "band"
|
| 43 |
+
|
| 44 |
+
Returns
|
| 45 |
+
-------
|
| 46 |
+
pd.DataFrame
|
| 47 |
+
The dataframe containing the bcf configuration band for each bcf ID, with columns "ID" and "bcf_config_band"
|
| 48 |
+
"""
|
| 49 |
+
df_band = df[["ID_BCF", "band"]].copy()
|
| 50 |
+
df_band["ID"] = df_band[["ID_BCF", "band"]].astype(str).apply("_".join, axis=1)
|
| 51 |
+
# remove duplicates ID
|
| 52 |
+
df_band = df_band.drop_duplicates(subset=["ID"])
|
| 53 |
+
df_band = df_band[["ID_BCF", "band"]]
|
| 54 |
+
df_band["band"] = df_band["band"].fillna("empty")
|
| 55 |
+
df_band = (
|
| 56 |
+
df_band.groupby("ID_BCF")["band"]
|
| 57 |
+
.apply(lambda x: "/".join(sorted(x)))
|
| 58 |
+
.reset_index()
|
| 59 |
+
)
|
| 60 |
+
# rename band to config
|
| 61 |
+
df_band.rename(columns={"band": "bcf_config_band"}, inplace=True)
|
| 62 |
+
|
| 63 |
+
return df_band
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def wbts_band(df: pd.DataFrame) -> pd.DataFrame:
|
| 67 |
+
"""
|
| 68 |
+
Create a dataframe that contains the wbts configuration band for each wbts ID.
|
| 69 |
+
|
| 70 |
+
Parameters
|
| 71 |
+
----------
|
| 72 |
+
df : pd.DataFrame
|
| 73 |
+
The dataframe containing the wbts information, with columns "ID" and "band"
|
| 74 |
+
|
| 75 |
+
Returns
|
| 76 |
+
-------
|
| 77 |
+
pd.DataFrame
|
| 78 |
+
The dataframe containing the wbts configuration band for each wbts ID, with columns "ID" and "wbts_config_band"
|
| 79 |
+
"""
|
| 80 |
+
df_band = df[["WBTS", "band"]].copy()
|
| 81 |
+
df_band["ID"] = df_band[["WBTS", "band"]].astype(str).apply("_".join, axis=1)
|
| 82 |
+
# remove duplicates ID
|
| 83 |
+
df_band = df_band.drop_duplicates(subset=["ID"])
|
| 84 |
+
df_band = df_band[["WBTS", "band"]]
|
| 85 |
+
df_band["band"] = df_band["band"].fillna("empty")
|
| 86 |
+
df_band = (
|
| 87 |
+
df_band.groupby("WBTS")["band"]
|
| 88 |
+
.apply(lambda x: "/".join(sorted(x)))
|
| 89 |
+
.reset_index()
|
| 90 |
+
)
|
| 91 |
+
# rename band to config
|
| 92 |
+
df_band.rename(columns={"band": "wbts_config_band"}, inplace=True)
|
| 93 |
+
|
| 94 |
+
return df_band
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def lte_mrbts_band(df: pd.DataFrame) -> pd.DataFrame:
|
| 98 |
+
"""
|
| 99 |
+
Create a dataframe that contains the mrbts configuration band for each mrbts ID.
|
| 100 |
+
|
| 101 |
+
Parameters
|
| 102 |
+
----------
|
| 103 |
+
df : pd.DataFrame
|
| 104 |
+
The dataframe containing the mrbts information, with columns "ID" and "band"
|
| 105 |
+
|
| 106 |
+
Returns
|
| 107 |
+
-------
|
| 108 |
+
pd.DataFrame
|
| 109 |
+
The dataframe containing the mrbts configuration band for each mrbts ID, with columns "ID" and "mrbts_config_band"
|
| 110 |
+
"""
|
| 111 |
+
df_band = df[["MRBTS", "band"]].copy()
|
| 112 |
+
df_band["ID"] = df_band[["MRBTS", "band"]].astype(str).apply("_".join, axis=1)
|
| 113 |
+
# remove duplicates ID
|
| 114 |
+
df_band = df_band.drop_duplicates(subset=["ID"])
|
| 115 |
+
df_band = df_band[["MRBTS", "band"]]
|
| 116 |
+
df_band["band"] = df_band["band"].fillna("empty")
|
| 117 |
+
df_band = (
|
| 118 |
+
df_band.groupby("MRBTS")["band"]
|
| 119 |
+
.apply(lambda x: "/".join(sorted(x)))
|
| 120 |
+
.reset_index()
|
| 121 |
+
)
|
| 122 |
+
# rename band to config
|
| 123 |
+
df_band.rename(columns={"band": "lte_config_band"}, inplace=True)
|
| 124 |
+
|
| 125 |
+
return df_band
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def adjl_band(df: pd.DataFrame, id_col: str, band_col: str) -> pd.DataFrame:
|
| 129 |
+
"""
|
| 130 |
+
Create a dataframe that contains the adjl configuration band for each adjl ID.
|
| 131 |
+
|
| 132 |
+
Parameters
|
| 133 |
+
----------
|
| 134 |
+
df : pd.DataFrame
|
| 135 |
+
The dataframe containing the adjl information, with columns "ID" and "band"
|
| 136 |
+
|
| 137 |
+
Returns
|
| 138 |
+
-------
|
| 139 |
+
pd.DataFrame
|
| 140 |
+
The dataframe containing the adjl configuration band for each adjl ID, with columns "ID" and "adjl_config_band"
|
| 141 |
+
"""
|
| 142 |
+
df_band = df[[id_col, band_col]].copy()
|
| 143 |
+
df_band["ID"] = df_band[[id_col, band_col]].astype(str).apply("_".join, axis=1)
|
| 144 |
+
# remove duplicates ID
|
| 145 |
+
df_band = df_band.drop_duplicates(subset=["ID"])
|
| 146 |
+
df_band = df_band[[id_col, band_col]]
|
| 147 |
+
df_band[band_col] = df_band[band_col].fillna("empty")
|
| 148 |
+
df_band = (
|
| 149 |
+
df_band.groupby(id_col)[band_col]
|
| 150 |
+
.apply(lambda x: "/".join(sorted(x)))
|
| 151 |
+
.reset_index()
|
| 152 |
+
)
|
| 153 |
+
# rename band to config
|
| 154 |
+
df_band.rename(columns={band_col: "adjl_created_band"}, inplace=True)
|
| 155 |
+
|
| 156 |
+
return df_band
|
utils/convert_to_excel.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import time
|
| 3 |
+
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import streamlit as st
|
| 6 |
+
|
| 7 |
+
# @st.cache_data
|
| 8 |
+
# def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
|
| 9 |
+
# # IMPORTANT: Cache the conversion to prevent computation on every rerun
|
| 10 |
+
|
| 11 |
+
# # Create a BytesIO object
|
| 12 |
+
# bytes_io = io.BytesIO()
|
| 13 |
+
|
| 14 |
+
# # Write the dataframes to the BytesIO object
|
| 15 |
+
# with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
|
| 16 |
+
# for df, sheet_name in zip(dfs, sheet_names):
|
| 17 |
+
# df.to_excel(writer, sheet_name=sheet_name, index=True)
|
| 18 |
+
|
| 19 |
+
# # Get the bytes data
|
| 20 |
+
# bytes_data = bytes_io.getvalue()
|
| 21 |
+
|
| 22 |
+
# # Close the BytesIO object
|
| 23 |
+
# bytes_io.close()
|
| 24 |
+
|
| 25 |
+
# return bytes_data
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_formats(workbook):
|
| 29 |
+
return {
|
| 30 |
+
"green": workbook.add_format(
|
| 31 |
+
{"bg_color": "#37CC73", "bold": True, "border": 1}
|
| 32 |
+
),
|
| 33 |
+
"green_light": workbook.add_format(
|
| 34 |
+
{"bg_color": "#87E0AB", "bold": True, "border": 1}
|
| 35 |
+
),
|
| 36 |
+
"blue": workbook.add_format({"bg_color": "#1A64FF", "bold": True, "border": 1}),
|
| 37 |
+
"blue_light": workbook.add_format(
|
| 38 |
+
{"bg_color": "#00B0F0", "bold": True, "border": 1}
|
| 39 |
+
),
|
| 40 |
+
"beurre": workbook.add_format(
|
| 41 |
+
{"bg_color": "#FFE699", "bold": True, "border": 1}
|
| 42 |
+
),
|
| 43 |
+
"orange": workbook.add_format(
|
| 44 |
+
{"bg_color": "#F47F31", "bold": True, "border": 1}
|
| 45 |
+
),
|
| 46 |
+
"purple5": workbook.add_format(
|
| 47 |
+
{"bg_color": "#E03DCD", "bold": True, "border": 1}
|
| 48 |
+
),
|
| 49 |
+
"purple6": workbook.add_format(
|
| 50 |
+
{"bg_color": "#AE83F8", "bold": True, "border": 1}
|
| 51 |
+
),
|
| 52 |
+
"gray": workbook.add_format({"bg_color": "#D9D9D9", "bold": True, "border": 1}),
|
| 53 |
+
"red": workbook.add_format({"bg_color": "#FF0000", "bold": True, "border": 1}),
|
| 54 |
+
"yellow": workbook.add_format(
|
| 55 |
+
{"bg_color": "#FFFF00", "bold": True, "border": 1}
|
| 56 |
+
),
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def get_format_map_by_format_type(formats: dict, format_type: str) -> dict:
|
| 61 |
+
if format_type == "GSM_Analysis":
|
| 62 |
+
return {
|
| 63 |
+
# "name": formats["blue"],
|
| 64 |
+
"amrSegLoadDepTchRateLower": formats["beurre"],
|
| 65 |
+
"amrSegLoadDepTchRateUpper": formats["beurre"],
|
| 66 |
+
"btsSpLoadDepTchRateLower": formats["beurre"],
|
| 67 |
+
"btsSpLoadDepTchRateUpper": formats["beurre"],
|
| 68 |
+
"amrWbFrCodecModeSet": formats["beurre"],
|
| 69 |
+
"dedicatedGPRScapacity": formats["beurre"],
|
| 70 |
+
"defaultGPRScapacity": formats["beurre"],
|
| 71 |
+
"number_trx_per_cell": formats["blue"],
|
| 72 |
+
"number_trx_per_bcf": formats["blue"],
|
| 73 |
+
"number_tch_per_cell": formats["blue"],
|
| 74 |
+
"number_sd_per_cell": formats["blue"],
|
| 75 |
+
"number_bcch_per_cell": formats["blue"],
|
| 76 |
+
"number_ccch_per_cell": formats["blue"],
|
| 77 |
+
"number_cbc_per_cell": formats["blue"],
|
| 78 |
+
"number_total_channels_per_cell": formats["blue"],
|
| 79 |
+
"number_signals_per_cell": formats["blue"],
|
| 80 |
+
"hf_rate_coef": formats["purple5"],
|
| 81 |
+
"GPRS": formats["purple5"],
|
| 82 |
+
"TCH Actual HR%": formats["green"],
|
| 83 |
+
"Offered Traffic BH": formats["green"],
|
| 84 |
+
"Max_Traffic BH": formats["green"],
|
| 85 |
+
"Avg_Traffic BH": formats["green"],
|
| 86 |
+
"TCH UTILIZATION (@Max Traffic)": formats["red"],
|
| 87 |
+
"Tch utilization comments": formats["orange"],
|
| 88 |
+
"ErlabngB_value": formats["purple6"],
|
| 89 |
+
"Target FR CHs": formats["purple6"],
|
| 90 |
+
"Target HR CHs": formats["purple6"],
|
| 91 |
+
"Target TCHs": formats["purple6"],
|
| 92 |
+
"Target TRXs": formats["purple6"],
|
| 93 |
+
"Number of required TRXs": formats["purple6"],
|
| 94 |
+
"max_tch_call_blocking_bh": formats["yellow"],
|
| 95 |
+
"avg_tch_call_blocking_bh": formats["yellow"],
|
| 96 |
+
"number_of_days_with_tch_blocking_exceeded_bh": formats["yellow"],
|
| 97 |
+
"tch_call_blocking_bh_comment": formats["orange"],
|
| 98 |
+
"max_sdcch_real_blocking_bh": formats["yellow"],
|
| 99 |
+
"avg_sdcch_real_blocking_bh": formats["yellow"],
|
| 100 |
+
"number_of_days_with_sdcch_blocking_exceeded_bh": formats["yellow"],
|
| 101 |
+
"sdcch_real_blocking_bh_comment": formats["orange"],
|
| 102 |
+
"Average_cell_availability_bh": formats["yellow"],
|
| 103 |
+
"number_of_days_exceeding_availability_threshold_bh": formats["yellow"],
|
| 104 |
+
"availability_comment_bh": formats["orange"],
|
| 105 |
+
"max_tch_abis_fail_bh": formats["yellow"],
|
| 106 |
+
"avg_tch_abis_fail_bh": formats["yellow"],
|
| 107 |
+
"number_of_days_with_tch_abis_fail_exceeded_bh": formats["yellow"],
|
| 108 |
+
"tch_abis_fail_bh_comment": formats["orange"],
|
| 109 |
+
"Average_cell_availability_daily": formats["green_light"],
|
| 110 |
+
"number_of_days_exceeding_availability_threshold_daily": formats[
|
| 111 |
+
"green_light"
|
| 112 |
+
],
|
| 113 |
+
"availability_comment_daily": formats["orange"],
|
| 114 |
+
"max_tch_abis_fail_daily": formats["green_light"],
|
| 115 |
+
"avg_tch_abis_fail_daily": formats["green_light"],
|
| 116 |
+
"number_of_days_with_tch_abis_fail_exceeded_daily": formats["green_light"],
|
| 117 |
+
"tch_abis_fail_daily_comment": formats["orange"],
|
| 118 |
+
"BH Congestion status": formats["gray"],
|
| 119 |
+
"operational_comment": formats["gray"],
|
| 120 |
+
"Final comment": formats["gray"],
|
| 121 |
+
"Final comment summary": formats["gray"],
|
| 122 |
+
# Operational Neighbours Distance Sheet
|
| 123 |
+
"Source_ID_BTS": formats["blue"],
|
| 124 |
+
"Source_name": formats["blue"],
|
| 125 |
+
"Source_BH Congestion status": formats["blue"],
|
| 126 |
+
"Source_Longitude": formats["blue"],
|
| 127 |
+
"Source_Latitude": formats["blue"],
|
| 128 |
+
"Neighbour_ID_BTS": formats["green_light"],
|
| 129 |
+
"Neighbour_name": formats["green_light"],
|
| 130 |
+
"Neighbour_operational_comment": formats["green_light"],
|
| 131 |
+
"Neighbour_Longitude": formats["green_light"],
|
| 132 |
+
"Neighbour_Latitude": formats["green_light"],
|
| 133 |
+
"Distance_km": formats["beurre"],
|
| 134 |
+
}
|
| 135 |
+
elif format_type == "database":
|
| 136 |
+
return {
|
| 137 |
+
"code": formats["blue"],
|
| 138 |
+
"Azimut": formats["green"],
|
| 139 |
+
"Longitude": formats["green"],
|
| 140 |
+
"Latitude": formats["green"],
|
| 141 |
+
"Hauteur": formats["green"],
|
| 142 |
+
"City": formats["green"],
|
| 143 |
+
"Adresse": formats["green"],
|
| 144 |
+
"Commune": formats["green"],
|
| 145 |
+
"Cercle": formats["green"],
|
| 146 |
+
"number_trx_per_cell": formats["blue_light"],
|
| 147 |
+
"number_trx_per_bcf": formats["blue_light"],
|
| 148 |
+
"number_trx_per_site": formats["blue_light"],
|
| 149 |
+
# invunit part in database
|
| 150 |
+
"FBBA": formats["blue_light"],
|
| 151 |
+
"FBBC": formats["blue_light"],
|
| 152 |
+
"FSMF": formats["blue_light"],
|
| 153 |
+
"ABIA": formats["blue_light"],
|
| 154 |
+
"total_number_of_subunit": formats["blue_light"],
|
| 155 |
+
"AHDA": formats["beurre"],
|
| 156 |
+
"AHEGB": formats["beurre"],
|
| 157 |
+
"AHEGC": formats["beurre"],
|
| 158 |
+
"AHEGHA": formats["beurre"],
|
| 159 |
+
"AHGA": formats["beurre"],
|
| 160 |
+
"AHMA": formats["beurre"],
|
| 161 |
+
"AHPMDA": formats["beurre"],
|
| 162 |
+
"AHPMDG": formats["beurre"],
|
| 163 |
+
"AHPMDI": formats["beurre"],
|
| 164 |
+
"ARDA": formats["beurre"],
|
| 165 |
+
"AREA": formats["beurre"],
|
| 166 |
+
"ARGA": formats["beurre"],
|
| 167 |
+
"ARMA": formats["beurre"],
|
| 168 |
+
"AZNA": formats["beurre"],
|
| 169 |
+
"FHDB": formats["beurre"],
|
| 170 |
+
"FHEB": formats["beurre"],
|
| 171 |
+
"FHEL": formats["beurre"],
|
| 172 |
+
"FRGU": formats["beurre"],
|
| 173 |
+
"FRGY": formats["beurre"],
|
| 174 |
+
"FRMB": formats["beurre"],
|
| 175 |
+
"FRMF": formats["beurre"],
|
| 176 |
+
"FXDB": formats["beurre"],
|
| 177 |
+
"FXED": formats["beurre"],
|
| 178 |
+
"FZNI": formats["beurre"],
|
| 179 |
+
}
|
| 180 |
+
elif format_type == "LTE_Analysis":
|
| 181 |
+
return {
|
| 182 |
+
"code": formats["blue"],
|
| 183 |
+
"code_sector": formats["blue"],
|
| 184 |
+
"Region": formats["blue"],
|
| 185 |
+
"site_config_band": formats["blue"],
|
| 186 |
+
"Longitude": formats["blue"],
|
| 187 |
+
"Latitude": formats["blue"],
|
| 188 |
+
# "name_l800": formats["beurre"],
|
| 189 |
+
# "name_l1800": formats["purple5"],
|
| 190 |
+
# "name_l2300": formats["purple6"],
|
| 191 |
+
# "name_l2600": formats["blue_light"],
|
| 192 |
+
# "name_l1800s": formats["gray"],
|
| 193 |
+
"prb_l800": formats["beurre"],
|
| 194 |
+
"prb_l1800": formats["beurre"],
|
| 195 |
+
"prb_l2300": formats["beurre"],
|
| 196 |
+
"prb_l2600": formats["beurre"],
|
| 197 |
+
"prb_l1800s": formats["beurre"],
|
| 198 |
+
"prb_l800_2nd": formats["purple5"],
|
| 199 |
+
"prb_l1800_2nd": formats["purple5"],
|
| 200 |
+
"prb_l2300_2nd": formats["purple5"],
|
| 201 |
+
"prb_l2600_2nd": formats["purple5"],
|
| 202 |
+
"prb_l1800s_2nd": formats["purple5"],
|
| 203 |
+
"act_ues_l800": formats["purple6"],
|
| 204 |
+
"act_ues_l1800": formats["purple6"],
|
| 205 |
+
"act_ues_l2300": formats["purple6"],
|
| 206 |
+
"act_ues_l2600": formats["purple6"],
|
| 207 |
+
"act_ues_l1800s": formats["purple6"],
|
| 208 |
+
"dl_thp_l800": formats["blue_light"],
|
| 209 |
+
"dl_thp_l1800": formats["blue_light"],
|
| 210 |
+
"dl_thp_l2300": formats["blue_light"],
|
| 211 |
+
"dl_thp_l2600": formats["blue_light"],
|
| 212 |
+
"dl_thp_l1800s": formats["blue_light"],
|
| 213 |
+
"ul_thp_l800": formats["gray"],
|
| 214 |
+
"ul_thp_l1800": formats["gray"],
|
| 215 |
+
"ul_thp_l2300": formats["gray"],
|
| 216 |
+
"ul_thp_l2600": formats["gray"],
|
| 217 |
+
"ul_thp_l1800s": formats["gray"],
|
| 218 |
+
"num_congested_cells": formats["orange"],
|
| 219 |
+
"num_cells": formats["orange"],
|
| 220 |
+
"num_cell_with_kpi": formats["orange"],
|
| 221 |
+
"num_down_or_no_kpi_cells": formats["orange"],
|
| 222 |
+
"prb_diff_between_cells": formats["orange"],
|
| 223 |
+
"load_balance_required": formats["orange"],
|
| 224 |
+
"congestion_comment": formats["orange"],
|
| 225 |
+
"final_comments": formats["green"],
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
elif format_type == "WCEL_capacity":
|
| 229 |
+
return {
|
| 230 |
+
"code": formats["blue"],
|
| 231 |
+
"Region": formats["blue"],
|
| 232 |
+
"name": formats["blue"],
|
| 233 |
+
"Avg_availability": formats["blue_light"],
|
| 234 |
+
"Avail_exceed_days": formats["blue_light"],
|
| 235 |
+
"availability_comment": formats["blue_light"],
|
| 236 |
+
"sum_traffic_cs": formats["beurre"],
|
| 237 |
+
"sum_traffic_dl": formats["beurre"],
|
| 238 |
+
"max_dl_throughput": formats["beurre"],
|
| 239 |
+
"avg_dl_throughput": formats["beurre"],
|
| 240 |
+
"max_users": formats["beurre"],
|
| 241 |
+
"max_iub_frameloss": formats["purple5"],
|
| 242 |
+
"iub_frameloss_exceed_days": formats["purple5"],
|
| 243 |
+
"max_hsdpa_congestion_rate_iub": formats["purple5"],
|
| 244 |
+
"hsdpa_iub_exceed_days": formats["purple5"],
|
| 245 |
+
"max_rrc_fail_ac": formats["purple6"],
|
| 246 |
+
"ac_fail_exceed_days": formats["purple6"],
|
| 247 |
+
"max_rrc_fail_ac_ul": formats["purple6"],
|
| 248 |
+
"ac_ul_fail_exceed_days": formats["purple6"],
|
| 249 |
+
"max_rrc_fail_ac_dl": formats["purple6"],
|
| 250 |
+
"ac_dl_fail_exceed_days": formats["purple6"],
|
| 251 |
+
"max_rrc_fail_code": formats["purple6"],
|
| 252 |
+
"code_fail_exceed_days": formats["purple6"],
|
| 253 |
+
"max_rrc_fail_bts": formats["yellow"],
|
| 254 |
+
"bts_fail_exceed_days": formats["yellow"],
|
| 255 |
+
"tx_congestion_comments": formats["green"],
|
| 256 |
+
"operational_comments": formats["green"],
|
| 257 |
+
"fails_comments": formats["green"],
|
| 258 |
+
"final_comments": formats["green"],
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
elif format_type == "invunit":
|
| 262 |
+
return {
|
| 263 |
+
"code": formats["blue"],
|
| 264 |
+
"FBBA": formats["blue_light"],
|
| 265 |
+
"FBBC": formats["blue_light"],
|
| 266 |
+
"FSMF": formats["blue_light"],
|
| 267 |
+
"ABIA": formats["blue_light"],
|
| 268 |
+
"total_number_of_subunit": formats["blue_light"],
|
| 269 |
+
"AHDA": formats["beurre"],
|
| 270 |
+
"AHEGB": formats["beurre"],
|
| 271 |
+
"AHEGC": formats["beurre"],
|
| 272 |
+
"AHEGHA": formats["beurre"],
|
| 273 |
+
"AHGA": formats["beurre"],
|
| 274 |
+
"AHMA": formats["beurre"],
|
| 275 |
+
"AHPMDA": formats["beurre"],
|
| 276 |
+
"AHPMDG": formats["beurre"],
|
| 277 |
+
"AHPMDI": formats["beurre"],
|
| 278 |
+
"ARDA": formats["beurre"],
|
| 279 |
+
"AREA": formats["beurre"],
|
| 280 |
+
"ARGA": formats["beurre"],
|
| 281 |
+
"ARMA": formats["beurre"],
|
| 282 |
+
"AZNA": formats["beurre"],
|
| 283 |
+
"FHDB": formats["beurre"],
|
| 284 |
+
"FHEB": formats["beurre"],
|
| 285 |
+
"FHEL": formats["beurre"],
|
| 286 |
+
"FRGU": formats["beurre"],
|
| 287 |
+
"FRGY": formats["beurre"],
|
| 288 |
+
"FRMB": formats["beurre"],
|
| 289 |
+
"FRMF": formats["beurre"],
|
| 290 |
+
"FXDB": formats["beurre"],
|
| 291 |
+
"FXED": formats["beurre"],
|
| 292 |
+
"FZNI": formats["beurre"],
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
else:
|
| 296 |
+
return {} # No formatting if format_type not matched
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def _apply_custom_formatting(
|
| 300 |
+
writer, df: pd.DataFrame, sheet_name: str, format_type: str
|
| 301 |
+
):
|
| 302 |
+
workbook = writer.book
|
| 303 |
+
worksheet = writer.sheets[sheet_name]
|
| 304 |
+
|
| 305 |
+
formats = get_formats(workbook)
|
| 306 |
+
format_map = get_format_map_by_format_type(formats, format_type)
|
| 307 |
+
|
| 308 |
+
for col_idx, col_name in enumerate(df.columns):
|
| 309 |
+
fmt = format_map.get(col_name)
|
| 310 |
+
if fmt:
|
| 311 |
+
worksheet.write(0, col_idx + 1, col_name, fmt)
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _write_to_excel(
|
| 315 |
+
dfs: list[pd.DataFrame], sheet_names: list[str], index=True, format_type: str = None
|
| 316 |
+
) -> bytes:
|
| 317 |
+
bytes_io = io.BytesIO()
|
| 318 |
+
with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
|
| 319 |
+
for df, name in zip(dfs, sheet_names):
|
| 320 |
+
# df.index.name = "index"
|
| 321 |
+
df.to_excel(writer, sheet_name=name, index=index)
|
| 322 |
+
if format_type:
|
| 323 |
+
_apply_custom_formatting(writer, df, name, format_type)
|
| 324 |
+
return bytes_io.getvalue()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
@st.cache_data
|
| 328 |
+
def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
|
| 329 |
+
return _write_to_excel(dfs, sheet_names, index=True)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
@st.cache_data
|
| 333 |
+
def convert_gsm_dfs(dfs, sheet_names) -> bytes:
|
| 334 |
+
return _write_to_excel(dfs, sheet_names, index=True, format_type="GSM_Analysis")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
@st.cache_data
|
| 338 |
+
def convert_lte_analysis_dfs(dfs, sheet_names) -> bytes:
|
| 339 |
+
return _write_to_excel(dfs, sheet_names, index=True, format_type="LTE_Analysis")
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
@st.cache_data
|
| 343 |
+
def convert_wcel_capacity_dfs(dfs, sheet_names) -> bytes:
|
| 344 |
+
return _write_to_excel(dfs, sheet_names, index=True, format_type="WCEL_capacity")
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
@st.cache_data
|
| 348 |
+
def convert_database_dfs(dfs, sheet_names) -> bytes:
|
| 349 |
+
return _write_to_excel(dfs, sheet_names, index=True, format_type="database")
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
@st.cache_data
|
| 353 |
+
def convert_invunit_dfs(dfs, sheet_names) -> bytes:
|
| 354 |
+
return _write_to_excel(dfs, sheet_names, index=True, format_type="invunit")
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def save_dataframe(df: pd.DataFrame, sheet_name: str):
|
| 358 |
+
"""
|
| 359 |
+
Save the dataframe to a csv file.
|
| 360 |
+
|
| 361 |
+
Args:
|
| 362 |
+
df (pd.DataFrame): The dataframe to save.
|
| 363 |
+
sheet_name (str): The name of the sheet.
|
| 364 |
+
"""
|
| 365 |
+
df.to_csv(f"data2/{sheet_name}_{time.time()}.csv", index=False, encoding="latin1")
|
utils/extract_code.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def extract_code_from_mrbts(mrbts):
|
| 2 |
+
"""
|
| 3 |
+
Extracts the code from a MRBTS (Mobile Radio Base Transceiver Station) string.
|
| 4 |
+
|
| 5 |
+
Args:
|
| 6 |
+
mrbts (int or str): The MRBTS string to extract the code from.
|
| 7 |
+
|
| 8 |
+
Returns:
|
| 9 |
+
int: The extracted code from the MRBTS string.
|
| 10 |
+
|
| 11 |
+
Raises:
|
| 12 |
+
None.
|
| 13 |
+
|
| 14 |
+
Notes:
|
| 15 |
+
This function handles MRBTS strings that start with '10' and have a length greater than 5,
|
| 16 |
+
as well as MRBTS strings that start with '1', '2', or '3'. For MRBTS strings that do not
|
| 17 |
+
meet these criteria, the entire MRBTS string is returned as an integer.
|
| 18 |
+
"""
|
| 19 |
+
str_mrbts = str(mrbts)
|
| 20 |
+
|
| 21 |
+
if len(str_mrbts) > 5 and str_mrbts.startswith("10"):
|
| 22 |
+
# For MRBTS starting with '10' and having length greater than 5
|
| 23 |
+
return int(str_mrbts[2:])
|
| 24 |
+
elif len(str_mrbts) > 4 and str_mrbts.startswith("1"):
|
| 25 |
+
return int(str_mrbts[1:])
|
| 26 |
+
elif len(str_mrbts) > 4 and str_mrbts.startswith("2"):
|
| 27 |
+
# For MRBTS starting with '2' (like 20000 + code)
|
| 28 |
+
return int(str_mrbts[1:])
|
| 29 |
+
elif len(str_mrbts) > 4 and str_mrbts.startswith("3"):
|
| 30 |
+
# For MRBTS starting with '3' (like 30000 + code)
|
| 31 |
+
return int(str_mrbts[1:])
|
| 32 |
+
else:
|
| 33 |
+
# Default case
|
| 34 |
+
return int(str_mrbts)
|
utils/kml_creator.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import math
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import simplekml
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def create_sector(kml: simplekml.Kml, row, arc_angle=65):
|
| 10 |
+
"""Create a sector shape for the telecom antenna in KML with sector details."""
|
| 11 |
+
code, name, azimuth, lon, lat, size, color = (
|
| 12 |
+
row["code"],
|
| 13 |
+
row["name"],
|
| 14 |
+
row["Azimut"],
|
| 15 |
+
row["Longitude"],
|
| 16 |
+
row["Latitude"],
|
| 17 |
+
row["size"],
|
| 18 |
+
row["color"],
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
num_points = 20 # Number of points for smooth arc
|
| 22 |
+
start_angle = azimuth - (arc_angle / 2)
|
| 23 |
+
end_angle = azimuth + (arc_angle / 2)
|
| 24 |
+
|
| 25 |
+
coords = [(lon, lat)] # Start with the site location (center point)
|
| 26 |
+
|
| 27 |
+
# Generate points for the sector arc
|
| 28 |
+
for angle in np.linspace(start_angle, end_angle, num_points):
|
| 29 |
+
angle_rad = math.radians(angle)
|
| 30 |
+
arc_lon = lon + (size / 111320) * math.sin(angle_rad)
|
| 31 |
+
arc_lat = lat + (size / 111320) * math.cos(angle_rad)
|
| 32 |
+
coords.append((arc_lon, arc_lat))
|
| 33 |
+
|
| 34 |
+
coords.append((lon, lat)) # Close the polygon
|
| 35 |
+
|
| 36 |
+
# Create the sector polygon
|
| 37 |
+
pol = kml.newpolygon(name=name, outerboundaryis=coords)
|
| 38 |
+
|
| 39 |
+
# Dynamically create the description from all DataFrame columns
|
| 40 |
+
description = "<b>Sector Details:</b><br>"
|
| 41 |
+
for column, value in row.items():
|
| 42 |
+
description += f"<b>{column}:</b> {value}<br>"
|
| 43 |
+
|
| 44 |
+
pol.description = description
|
| 45 |
+
pol.style.polystyle.color = color # Set color from DataFrame
|
| 46 |
+
pol.style.polystyle.outline = 1 # Outline enabled
|
| 47 |
+
pol.style.linestyle.color = "ff000000" # Black outline
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def generate_kml_from_df(df: pd.DataFrame):
|
| 51 |
+
"""Generate a KML file from a Pandas DataFrame for telecom sectors."""
|
| 52 |
+
kml = simplekml.Kml()
|
| 53 |
+
site_added = set() # Keep track of sites already added to avoid duplicates
|
| 54 |
+
|
| 55 |
+
# Sort the DataFrame to ensure 900 MHz (smaller) is drawn last (on top)
|
| 56 |
+
df_sorted = df.sort_values(
|
| 57 |
+
by="size", ascending=False
|
| 58 |
+
) # Larger first, smaller on top
|
| 59 |
+
|
| 60 |
+
for _, row in df_sorted.iterrows():
|
| 61 |
+
code, lon, lat = row["code"], row["Longitude"], row["Latitude"]
|
| 62 |
+
|
| 63 |
+
# Add site name as a point only once
|
| 64 |
+
if code not in site_added:
|
| 65 |
+
pnt = kml.newpoint(name=code, coords=[(lon, lat)])
|
| 66 |
+
pnt.style.iconstyle.icon.href = (
|
| 67 |
+
"http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
|
| 68 |
+
)
|
| 69 |
+
pnt.style.labelstyle.scale = 1.2 # Adjust label size
|
| 70 |
+
pnt.description = f"Site: {code}<br>Location: {lat}, {lon}"
|
| 71 |
+
site_added.add(code)
|
| 72 |
+
|
| 73 |
+
create_sector(kml, row)
|
| 74 |
+
|
| 75 |
+
kml_data = io.BytesIO()
|
| 76 |
+
kml_str = kml.kml() # Get KML as string
|
| 77 |
+
kml_data.write(kml_str.encode("utf-8")) # Write KML to BytesIO
|
| 78 |
+
kml_data.seek(0) # Move to beginning of BytesIO
|
| 79 |
+
return kml_data
|
utils/kpi_analysis_utils.py
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class GsmAnalysis:
|
| 8 |
+
hf_rate_coef = {
|
| 9 |
+
10: 1.1,
|
| 10 |
+
20: 1.2,
|
| 11 |
+
40: 1.4,
|
| 12 |
+
60: 1.6,
|
| 13 |
+
70: 1.7,
|
| 14 |
+
80: 1.8,
|
| 15 |
+
99: 2.0,
|
| 16 |
+
100: 1.4,
|
| 17 |
+
}
|
| 18 |
+
erlangB_table = {
|
| 19 |
+
1: 0.0204,
|
| 20 |
+
2: 0.2234,
|
| 21 |
+
3: 0.6022,
|
| 22 |
+
4: 1.092,
|
| 23 |
+
5: 1.657,
|
| 24 |
+
6: 2.276,
|
| 25 |
+
7: 2.935,
|
| 26 |
+
8: 3.627,
|
| 27 |
+
9: 4.345,
|
| 28 |
+
10: 5.084,
|
| 29 |
+
11: 5.841,
|
| 30 |
+
12: 6.614,
|
| 31 |
+
13: 7.401,
|
| 32 |
+
14: 8.2,
|
| 33 |
+
15: 9.009,
|
| 34 |
+
16: 9.828,
|
| 35 |
+
17: 10.66,
|
| 36 |
+
18: 11.49,
|
| 37 |
+
19: 12.33,
|
| 38 |
+
20: 13.18,
|
| 39 |
+
21: 14.04,
|
| 40 |
+
22: 14.9,
|
| 41 |
+
23: 15.76,
|
| 42 |
+
24: 16.63,
|
| 43 |
+
25: 17.5,
|
| 44 |
+
26: 18.38,
|
| 45 |
+
27: 19.26,
|
| 46 |
+
28: 20.15,
|
| 47 |
+
29: 21.04,
|
| 48 |
+
30: 21.93,
|
| 49 |
+
31: 22.83,
|
| 50 |
+
32: 23.72,
|
| 51 |
+
33: 24.63,
|
| 52 |
+
34: 25.53,
|
| 53 |
+
35: 26.43,
|
| 54 |
+
36: 27.34,
|
| 55 |
+
37: 28.25,
|
| 56 |
+
38: 29.17,
|
| 57 |
+
39: 30.08,
|
| 58 |
+
40: 31,
|
| 59 |
+
41: 31.91,
|
| 60 |
+
42: 32.84,
|
| 61 |
+
43: 33.76,
|
| 62 |
+
44: 34.68,
|
| 63 |
+
45: 35.61,
|
| 64 |
+
46: 36.53,
|
| 65 |
+
47: 37.46,
|
| 66 |
+
48: 38.39,
|
| 67 |
+
49: 39.32,
|
| 68 |
+
50: 40.25,
|
| 69 |
+
51: 41.19,
|
| 70 |
+
52: 42.12,
|
| 71 |
+
53: 43.06,
|
| 72 |
+
54: 44,
|
| 73 |
+
55: 44.93,
|
| 74 |
+
56: 45.88,
|
| 75 |
+
57: 46.81,
|
| 76 |
+
58: 47.75,
|
| 77 |
+
59: 48.7,
|
| 78 |
+
60: 49.64,
|
| 79 |
+
61: 50.59,
|
| 80 |
+
62: 51.53,
|
| 81 |
+
63: 52.48,
|
| 82 |
+
64: 53.43,
|
| 83 |
+
65: 54.38,
|
| 84 |
+
66: 55.32,
|
| 85 |
+
67: 56.27,
|
| 86 |
+
68: 57.22,
|
| 87 |
+
69: 58.18,
|
| 88 |
+
70: 59.13,
|
| 89 |
+
71: 60.08,
|
| 90 |
+
72: 61.04,
|
| 91 |
+
73: 61.99,
|
| 92 |
+
74: 62.94,
|
| 93 |
+
75: 63.9,
|
| 94 |
+
76: 64.86,
|
| 95 |
+
77: 65.81,
|
| 96 |
+
78: 66.77,
|
| 97 |
+
79: 67.73,
|
| 98 |
+
80: 68.69,
|
| 99 |
+
81: 69.64,
|
| 100 |
+
82: 70.61,
|
| 101 |
+
83: 71.57,
|
| 102 |
+
84: 72.53,
|
| 103 |
+
85: 73.49,
|
| 104 |
+
86: 74.45,
|
| 105 |
+
87: 75.41,
|
| 106 |
+
88: 76.38,
|
| 107 |
+
89: 77.34,
|
| 108 |
+
90: 78.3,
|
| 109 |
+
91: 79.27,
|
| 110 |
+
92: 80.23,
|
| 111 |
+
93: 81.2,
|
| 112 |
+
94: 82.16,
|
| 113 |
+
95: 83.13,
|
| 114 |
+
96: 84.09,
|
| 115 |
+
97: 85.06,
|
| 116 |
+
98: 86.03,
|
| 117 |
+
99: 87,
|
| 118 |
+
100: 87.97,
|
| 119 |
+
101: 88.94,
|
| 120 |
+
102: 89.91,
|
| 121 |
+
103: 90.88,
|
| 122 |
+
104: 91.85,
|
| 123 |
+
105: 92.82,
|
| 124 |
+
106: 93.79,
|
| 125 |
+
107: 94.76,
|
| 126 |
+
108: 95.73,
|
| 127 |
+
109: 96.71,
|
| 128 |
+
110: 97.68,
|
| 129 |
+
111: 98.65,
|
| 130 |
+
112: 99.63,
|
| 131 |
+
113: 100.6,
|
| 132 |
+
114: 101.57,
|
| 133 |
+
115: 102.54,
|
| 134 |
+
116: 103.52,
|
| 135 |
+
117: 104.49,
|
| 136 |
+
118: 105.47,
|
| 137 |
+
119: 106.44,
|
| 138 |
+
120: 107.42,
|
| 139 |
+
121: 108.4,
|
| 140 |
+
122: 109.37,
|
| 141 |
+
123: 110.35,
|
| 142 |
+
124: 111.32,
|
| 143 |
+
125: 112.3,
|
| 144 |
+
126: 113.28,
|
| 145 |
+
127: 114.25,
|
| 146 |
+
128: 115.23,
|
| 147 |
+
129: 116.21,
|
| 148 |
+
130: 117.19,
|
| 149 |
+
131: 118.17,
|
| 150 |
+
132: 119.15,
|
| 151 |
+
133: 120.12,
|
| 152 |
+
134: 121.1,
|
| 153 |
+
135: 122.08,
|
| 154 |
+
136: 123.07,
|
| 155 |
+
137: 124.04,
|
| 156 |
+
138: 125.02,
|
| 157 |
+
139: 126.01341,
|
| 158 |
+
140: 127.00918,
|
| 159 |
+
141: 127.96752,
|
| 160 |
+
142: 128.98152,
|
| 161 |
+
143: 129.92152,
|
| 162 |
+
144: 130.88534,
|
| 163 |
+
145: 131.96461,
|
| 164 |
+
146: 132.89897,
|
| 165 |
+
147: 133.86373,
|
| 166 |
+
148: 134.82569,
|
| 167 |
+
149: 135.76295,
|
| 168 |
+
150: 136.82988,
|
| 169 |
+
151: 137.79,
|
| 170 |
+
152: 138.77,
|
| 171 |
+
153: 139.75,
|
| 172 |
+
154: 140.74,
|
| 173 |
+
155: 141.72,
|
| 174 |
+
156: 142.7,
|
| 175 |
+
157: 143.69,
|
| 176 |
+
158: 144.67,
|
| 177 |
+
159: 145.66,
|
| 178 |
+
160: 146.64,
|
| 179 |
+
161: 147.63,
|
| 180 |
+
162: 148.61,
|
| 181 |
+
163: 149.6,
|
| 182 |
+
164: 150.58,
|
| 183 |
+
165: 151.57,
|
| 184 |
+
166: 152.55,
|
| 185 |
+
167: 153.54,
|
| 186 |
+
168: 154.53,
|
| 187 |
+
169: 155.51,
|
| 188 |
+
170: 156.5,
|
| 189 |
+
171: 157.48,
|
| 190 |
+
172: 158.47,
|
| 191 |
+
173: 159.46,
|
| 192 |
+
174: 160.44,
|
| 193 |
+
175: 161.43,
|
| 194 |
+
176: 162.42,
|
| 195 |
+
177: 163.41,
|
| 196 |
+
178: 164.39,
|
| 197 |
+
179: 165.38,
|
| 198 |
+
180: 166.37,
|
| 199 |
+
181: 167.36,
|
| 200 |
+
182: 168.35,
|
| 201 |
+
183: 169.33,
|
| 202 |
+
184: 170.32,
|
| 203 |
+
185: 171.31,
|
| 204 |
+
186: 172.3,
|
| 205 |
+
187: 173.29,
|
| 206 |
+
188: 174.28,
|
| 207 |
+
189: 175.27,
|
| 208 |
+
190: 176.26,
|
| 209 |
+
191: 177.25,
|
| 210 |
+
192: 178.24,
|
| 211 |
+
193: 179.23,
|
| 212 |
+
194: 180.22,
|
| 213 |
+
195: 181.21,
|
| 214 |
+
196: 182.2,
|
| 215 |
+
197: 183.19,
|
| 216 |
+
198: 184.18,
|
| 217 |
+
199: 185.17,
|
| 218 |
+
200: 186.16,
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class GsmCapacity:
|
| 223 |
+
final_results = None
|
| 224 |
+
operational_neighbours_df = None
|
| 225 |
+
final_comment_mapping = {
|
| 226 |
+
"Availability and TX issues": "Operational issues with no congestion",
|
| 227 |
+
"Availability issues": "Operational issues with no congestion",
|
| 228 |
+
"TX issues": "Operational issues with no congestion",
|
| 229 |
+
"Operational is OK": "Operational is OK with no congestion",
|
| 230 |
+
"Tch utilization exceeded threshold, Availability and TX issues": "High utilization with Operational issues",
|
| 231 |
+
"Tch utilization exceeded threshold, Availability issues": "High utilization with Operational issues",
|
| 232 |
+
"Tch utilization exceeded threshold, TX issues": "High utilization with Operational issues",
|
| 233 |
+
"Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
|
| 234 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
|
| 235 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
|
| 236 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High Utilization with Congestion without Operational issues",
|
| 237 |
+
"Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
|
| 238 |
+
"Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
|
| 239 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
|
| 240 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
|
| 241 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
|
| 242 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
|
| 243 |
+
"Tch utilization exceeded threshold, TCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
|
| 244 |
+
"Down Site": "Down Cell",
|
| 245 |
+
"SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
|
| 246 |
+
"TCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
|
| 247 |
+
"TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
|
| 248 |
+
"Tch utilization exceeded threshold, Operational is OK": "High utilization without Congestion and Operational issues",
|
| 249 |
+
"SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
|
| 250 |
+
"SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
|
| 251 |
+
"SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
|
| 252 |
+
"TCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
|
| 253 |
+
"TCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
|
| 254 |
+
"TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
|
| 255 |
+
"TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
|
| 256 |
+
"TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
|
| 257 |
+
"TCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
|
| 262 |
+
"""
|
| 263 |
+
Combine comments from multiple columns into one column.
|
| 264 |
+
|
| 265 |
+
Args:
|
| 266 |
+
df: DataFrame containing comment columns
|
| 267 |
+
*columns: Variable number of column names containing comments
|
| 268 |
+
new_column: Name for the new combined comments column
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
DataFrame with a new column containing combined comments
|
| 272 |
+
"""
|
| 273 |
+
result_df = df.copy()
|
| 274 |
+
result_df[new_column] = result_df[list(columns)].apply(
|
| 275 |
+
lambda row: ", ".join([str(x) for x in row if x]), axis=1
|
| 276 |
+
)
|
| 277 |
+
# Trim all trailing commas
|
| 278 |
+
result_df[new_column] = result_df[new_column].str.replace(
|
| 279 |
+
r"^[,\s]+|[,\s]+$", "", regex=True
|
| 280 |
+
)
|
| 281 |
+
# Replace multiple commas with a single comma
|
| 282 |
+
result_df[new_column] = result_df[new_column].str.replace(
|
| 283 |
+
r",\s*,", ", ", regex=True
|
| 284 |
+
)
|
| 285 |
+
return result_df
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def summarize_fails_comments(comment):
|
| 289 |
+
if not comment or pd.isna(comment) or comment.strip() == "":
|
| 290 |
+
return ""
|
| 291 |
+
|
| 292 |
+
# Extract all `rrc_fail_xxx` fields
|
| 293 |
+
matches = re.findall(r"rrc_fail_([a-z_]+)", comment)
|
| 294 |
+
if not matches:
|
| 295 |
+
return ""
|
| 296 |
+
|
| 297 |
+
# Remove duplicates, sort alphabetically
|
| 298 |
+
unique_sorted = sorted(set(matches))
|
| 299 |
+
|
| 300 |
+
# Combine and add 'fails'
|
| 301 |
+
return ", ".join(unique_sorted) + " fails"
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
|
| 305 |
+
"""
|
| 306 |
+
Clean KPI column names by replacing special characters and standardizing format.
|
| 307 |
+
|
| 308 |
+
Args:
|
| 309 |
+
df: DataFrame with KPI column names to clean
|
| 310 |
+
|
| 311 |
+
Returns:
|
| 312 |
+
DataFrame with cleaned column names
|
| 313 |
+
"""
|
| 314 |
+
name_df: pd.DataFrame = df.copy()
|
| 315 |
+
name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
|
| 316 |
+
name_df.columns = name_df.columns.str.replace("___", "_")
|
| 317 |
+
name_df.columns = name_df.columns.str.replace("__", "_")
|
| 318 |
+
name_df.columns = name_df.columns.str.replace("%", "perc")
|
| 319 |
+
name_df.columns = name_df.columns.str.rstrip("_")
|
| 320 |
+
return name_df
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
|
| 324 |
+
"""
|
| 325 |
+
Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
|
| 326 |
+
|
| 327 |
+
Args:
|
| 328 |
+
df: DataFrame containing PERIOD_START_TIME column
|
| 329 |
+
|
| 330 |
+
Returns:
|
| 331 |
+
DataFrame with new date column and unnecessary columns removed
|
| 332 |
+
"""
|
| 333 |
+
date_df: pd.DataFrame = df.copy()
|
| 334 |
+
date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
|
| 335 |
+
".", expand=True
|
| 336 |
+
)
|
| 337 |
+
date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
|
| 338 |
+
# Remove unnecessary columns
|
| 339 |
+
date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
|
| 340 |
+
return date_df
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def create_hourly_date(df: pd.DataFrame) -> pd.DataFrame:
|
| 344 |
+
date_df: pd.DataFrame = df
|
| 345 |
+
date_df[["date_t", "hour"]] = date_df["PERIOD_START_TIME"].str.split(
|
| 346 |
+
" ", expand=True
|
| 347 |
+
)
|
| 348 |
+
date_df[["mois", "jour", "annee"]] = date_df["date_t"].str.split(".", expand=True)
|
| 349 |
+
date_df["datetime"] = (
|
| 350 |
+
date_df["annee"]
|
| 351 |
+
+ "-"
|
| 352 |
+
+ date_df["mois"]
|
| 353 |
+
+ "-"
|
| 354 |
+
+ date_df["jour"]
|
| 355 |
+
+ " "
|
| 356 |
+
+ date_df["hour"]
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
|
| 360 |
+
|
| 361 |
+
# Remove columns 'année' and 'mois'
|
| 362 |
+
date_df = date_df.drop(
|
| 363 |
+
["annee", "mois", "jour", "date_t", "PERIOD_START_TIME"], axis=1
|
| 364 |
+
)
|
| 365 |
+
return date_df
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def create_dfs_per_kpi(
|
| 369 |
+
df: pd.DataFrame = None,
|
| 370 |
+
pivot_date_column: str = "date",
|
| 371 |
+
pivot_name_column: str = "BTS_name",
|
| 372 |
+
kpi_columns_from: int = None,
|
| 373 |
+
) -> pd.DataFrame:
|
| 374 |
+
"""
|
| 375 |
+
Create pivoted DataFrames for each KPI and perform analysis.
|
| 376 |
+
|
| 377 |
+
Args:
|
| 378 |
+
df: DataFrame containing KPI data
|
| 379 |
+
Returns:
|
| 380 |
+
DataFrame with combined analysis results
|
| 381 |
+
"""
|
| 382 |
+
kpi_columns = df.columns[kpi_columns_from:]
|
| 383 |
+
|
| 384 |
+
pivoted_kpi_dfs = {}
|
| 385 |
+
|
| 386 |
+
# Loop through each KPI and create pivoted DataFrames
|
| 387 |
+
for kpi in kpi_columns:
|
| 388 |
+
temp_df = df[[pivot_date_column, pivot_name_column, kpi]].copy()
|
| 389 |
+
# remove duplicates
|
| 390 |
+
temp_df = temp_df.drop_duplicates(
|
| 391 |
+
subset=[pivot_name_column, pivot_date_column], keep="first"
|
| 392 |
+
)
|
| 393 |
+
temp_df = temp_df.reset_index()
|
| 394 |
+
# Pivot the dataframe
|
| 395 |
+
pivot_df = temp_df.pivot(
|
| 396 |
+
index=pivot_name_column, columns=pivot_date_column, values=kpi
|
| 397 |
+
)
|
| 398 |
+
pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
|
| 399 |
+
pivot_df.columns.names = ["KPI", "Date"]
|
| 400 |
+
|
| 401 |
+
# Store in dictionary with KPI name as key
|
| 402 |
+
pivoted_kpi_dfs[kpi] = pivot_df
|
| 403 |
+
|
| 404 |
+
return pivoted_kpi_dfs
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def cell_availability_analysis(
|
| 408 |
+
df: pd.DataFrame,
|
| 409 |
+
days: int = 7,
|
| 410 |
+
availability_threshold: int = 95,
|
| 411 |
+
analysis_type: str = "daily",
|
| 412 |
+
) -> pd.DataFrame:
|
| 413 |
+
"""
|
| 414 |
+
Analyze cell availability and categorize sites based on availability metrics.
|
| 415 |
+
|
| 416 |
+
Args:
|
| 417 |
+
df: DataFrame containing cell availability data
|
| 418 |
+
days: Number of days to analyze
|
| 419 |
+
|
| 420 |
+
Returns:
|
| 421 |
+
DataFrame with availability analysis and site status comments
|
| 422 |
+
"""
|
| 423 |
+
result_df: pd.DataFrame = df.copy().fillna(0)
|
| 424 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -days:]
|
| 425 |
+
result_df[f"Average_cell_availability_{analysis_type.lower()}"] = last_days_df.mean(
|
| 426 |
+
axis=1
|
| 427 |
+
).round(2)
|
| 428 |
+
|
| 429 |
+
# Count the number of days above threshold
|
| 430 |
+
result_df[
|
| 431 |
+
f"number_of_days_exceeding_availability_threshold_{analysis_type.lower()}"
|
| 432 |
+
] = last_days_df.apply(
|
| 433 |
+
lambda row: sum(1 for x in row if x <= availability_threshold), axis=1
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
# Categorize sites based on availability
|
| 437 |
+
def categorize_availability(x: float) -> str:
|
| 438 |
+
if x == 0 or pd.isnull(x):
|
| 439 |
+
return "Down Site"
|
| 440 |
+
elif 0 < x <= 70:
|
| 441 |
+
return "critical instability"
|
| 442 |
+
elif 70 < x <= availability_threshold:
|
| 443 |
+
return "instability"
|
| 444 |
+
else:
|
| 445 |
+
return "Availability OK"
|
| 446 |
+
|
| 447 |
+
result_df[f"availability_comment_{analysis_type.lower()}"] = result_df[
|
| 448 |
+
f"Average_cell_availability_{analysis_type.lower()}"
|
| 449 |
+
].apply(categorize_availability)
|
| 450 |
+
|
| 451 |
+
return result_df
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
def analyze_tch_abis_fails(
|
| 455 |
+
df: pd.DataFrame,
|
| 456 |
+
number_of_kpi_days: int,
|
| 457 |
+
analysis_type: str,
|
| 458 |
+
number_of_threshold_days: int,
|
| 459 |
+
tch_abis_fails_threshold: int,
|
| 460 |
+
) -> pd.DataFrame:
|
| 461 |
+
|
| 462 |
+
result_df: pd.DataFrame = df.copy()
|
| 463 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 464 |
+
# last_days_df = last_days_df.fillna(0)
|
| 465 |
+
|
| 466 |
+
result_df[f"avg_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.mean(
|
| 467 |
+
axis=1
|
| 468 |
+
).round(2)
|
| 469 |
+
result_df[f"max_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.max(axis=1)
|
| 470 |
+
# Count the number of days above threshold
|
| 471 |
+
result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"] = (
|
| 472 |
+
last_days_df.apply(
|
| 473 |
+
lambda row: sum(1 for x in row if x >= tch_abis_fails_threshold), axis=1
|
| 474 |
+
)
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
# Add the daily_tch_comment : if number_of_days_with_tch_abis_fail_exceeded_daily is >= number_of_threshold_days : tch abis fail exceeded threshold , else : None
|
| 478 |
+
result_df[f"tch_abis_fail_{analysis_type.lower()}_comment"] = np.where(
|
| 479 |
+
result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"]
|
| 480 |
+
>= number_of_threshold_days,
|
| 481 |
+
"tch abis fail exceeded threshold",
|
| 482 |
+
None,
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
return result_df
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def analyze_tch_call_blocking(
|
| 489 |
+
df: pd.DataFrame,
|
| 490 |
+
number_of_kpi_days: int,
|
| 491 |
+
analysis_type: str,
|
| 492 |
+
number_of_threshold_days: int,
|
| 493 |
+
tch_blocking_threshold: int,
|
| 494 |
+
) -> pd.DataFrame:
|
| 495 |
+
|
| 496 |
+
result_df = df.copy()
|
| 497 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 498 |
+
# last_days_df = last_days_df.fillna(0)
|
| 499 |
+
|
| 500 |
+
result_df[f"avg_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.mean(
|
| 501 |
+
axis=1
|
| 502 |
+
).round(2)
|
| 503 |
+
result_df[f"max_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.max(
|
| 504 |
+
axis=1
|
| 505 |
+
)
|
| 506 |
+
# Count the number of days above threshold
|
| 507 |
+
result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"] = (
|
| 508 |
+
last_days_df.apply(
|
| 509 |
+
lambda row: sum(1 for x in row if x >= tch_blocking_threshold), axis=1
|
| 510 |
+
)
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
# Add the daily_tch_comment : if number_of_days_with_tch_blocking_exceeded_daily is >= number_of_threshold_days : tch blocking exceeded threshold , else : None
|
| 514 |
+
result_df[f"tch_call_blocking_{analysis_type.lower()}_comment"] = np.where(
|
| 515 |
+
result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"]
|
| 516 |
+
>= number_of_threshold_days,
|
| 517 |
+
"TCH blocking exceeded threshold",
|
| 518 |
+
None,
|
| 519 |
+
)
|
| 520 |
+
return result_df
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
def analyze_sdcch_call_blocking(
|
| 524 |
+
df: pd.DataFrame,
|
| 525 |
+
number_of_kpi_days: int,
|
| 526 |
+
sdcch_blocking_threshold: int,
|
| 527 |
+
analysis_type: str,
|
| 528 |
+
number_of_threshold_days: int,
|
| 529 |
+
) -> pd.DataFrame:
|
| 530 |
+
|
| 531 |
+
result_df = df.copy()
|
| 532 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 533 |
+
# last_days_df = last_days_df.fillna(0)
|
| 534 |
+
|
| 535 |
+
result_df[f"avg_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.mean(
|
| 536 |
+
axis=1
|
| 537 |
+
).round(2)
|
| 538 |
+
result_df[f"max_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.max(
|
| 539 |
+
axis=1
|
| 540 |
+
)
|
| 541 |
+
# Count the number of days above threshold
|
| 542 |
+
result_df[
|
| 543 |
+
f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
|
| 544 |
+
] = last_days_df.apply(
|
| 545 |
+
lambda row: sum(1 for x in row if x >= sdcch_blocking_threshold), axis=1
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
# add daily_sdcch_comment : if number_of_days_with_sdcch_blocking_exceeded_daily is >= number_of_threshold_days : sdcch blocking exceeded threshold , else : None
|
| 549 |
+
result_df[f"sdcch_real_blocking_{analysis_type.lower()}_comment"] = np.where(
|
| 550 |
+
result_df[
|
| 551 |
+
f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
|
| 552 |
+
]
|
| 553 |
+
>= number_of_threshold_days,
|
| 554 |
+
"SDCCH blocking exceeded threshold",
|
| 555 |
+
None,
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
return result_df
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
class LteCapacity:
|
| 562 |
+
final_results = None
|
| 563 |
+
# Next band mapping
|
| 564 |
+
next_band_mapping = {
|
| 565 |
+
"L1800": "L800",
|
| 566 |
+
"L800": "L1800",
|
| 567 |
+
"L1800/L800": "L2600",
|
| 568 |
+
"L1800/L2300/L800": "L2600",
|
| 569 |
+
"L2300/L800": "L2600",
|
| 570 |
+
"L1800/L2600/L800": "New site/Dual Beam",
|
| 571 |
+
"L1800/L2300/L2600/L800": "New site/Dual Beam",
|
| 572 |
+
"L2300": "FDD H// colocated site",
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
|
| 576 |
+
def analyze_prb_usage(
|
| 577 |
+
df: pd.DataFrame,
|
| 578 |
+
number_of_kpi_days: int,
|
| 579 |
+
prb_usage_threshold: int,
|
| 580 |
+
analysis_type: str,
|
| 581 |
+
number_of_threshold_days: int,
|
| 582 |
+
suffix: str = "",
|
| 583 |
+
) -> pd.DataFrame:
|
| 584 |
+
result_df = df.copy()
|
| 585 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 586 |
+
# last_days_df = last_days_df.fillna(0)
|
| 587 |
+
|
| 588 |
+
result_df[f"avg_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.mean(
|
| 589 |
+
axis=1
|
| 590 |
+
).round(2)
|
| 591 |
+
result_df[f"max_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.max(
|
| 592 |
+
axis=1
|
| 593 |
+
)
|
| 594 |
+
# Count the number of days above threshold
|
| 595 |
+
result_df[
|
| 596 |
+
f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
|
| 597 |
+
] = last_days_df.apply(
|
| 598 |
+
lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
# Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
|
| 602 |
+
result_df[f"prb_usage_{analysis_type.lower()}{suffix}_comment"] = np.where(
|
| 603 |
+
result_df[
|
| 604 |
+
f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
|
| 605 |
+
]
|
| 606 |
+
>= number_of_threshold_days,
|
| 607 |
+
"PRB usage exceeded threshold",
|
| 608 |
+
None,
|
| 609 |
+
)
|
| 610 |
+
return result_df
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def analyze_fails_kpi(
|
| 614 |
+
df: pd.DataFrame,
|
| 615 |
+
number_of_kpi_days: int,
|
| 616 |
+
number_of_threshold_days: int,
|
| 617 |
+
kpi_threshold: int,
|
| 618 |
+
kpi_column_name: str,
|
| 619 |
+
) -> pd.DataFrame:
|
| 620 |
+
result_df: pd.DataFrame = df.copy()
|
| 621 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 622 |
+
# last_days_df = last_days_df.fillna(0)
|
| 623 |
+
|
| 624 |
+
result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
|
| 625 |
+
result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
|
| 626 |
+
# Count the number of days above threshold
|
| 627 |
+
result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
|
| 628 |
+
lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
# Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
|
| 632 |
+
result_df[f"{kpi_column_name}_comment"] = np.where(
|
| 633 |
+
result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
|
| 634 |
+
>= number_of_threshold_days,
|
| 635 |
+
f"{kpi_column_name} exceeded threshold",
|
| 636 |
+
None,
|
| 637 |
+
)
|
| 638 |
+
return result_df
|
| 639 |
+
|
| 640 |
+
|
| 641 |
+
def analyze_lcg_utilization(
|
| 642 |
+
df: pd.DataFrame,
|
| 643 |
+
number_of_kpi_days: int,
|
| 644 |
+
number_of_threshold_days: int,
|
| 645 |
+
kpi_threshold: int,
|
| 646 |
+
kpi_column_name: str,
|
| 647 |
+
) -> pd.DataFrame:
|
| 648 |
+
result_df: pd.DataFrame = df.copy()
|
| 649 |
+
last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
|
| 650 |
+
# last_days_df = last_days_df.fillna(0)
|
| 651 |
+
|
| 652 |
+
result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
|
| 653 |
+
result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
|
| 654 |
+
# Count the number of days above threshold
|
| 655 |
+
result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
|
| 656 |
+
lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
|
| 657 |
+
)
|
| 658 |
+
|
| 659 |
+
# Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
|
| 660 |
+
result_df[f"{kpi_column_name}_comment"] = np.where(
|
| 661 |
+
result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
|
| 662 |
+
>= number_of_threshold_days,
|
| 663 |
+
f"{kpi_column_name} exceeded threshold",
|
| 664 |
+
None,
|
| 665 |
+
)
|
| 666 |
+
return result_df
|
utils/rnc_bsc_lac_count_chart.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import plotly.express as px
|
| 3 |
+
import plotly.graph_objects as go
|
| 4 |
+
from plotly.subplots import make_subplots
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Reusable function to create subplots
|
| 8 |
+
def create_lac_count_per_controller_subplots(
|
| 9 |
+
df: pd.DataFrame,
|
| 10 |
+
controller_column: str,
|
| 11 |
+
lac_column: str,
|
| 12 |
+
count_column: str,
|
| 13 |
+
fig_title: str,
|
| 14 |
+
):
|
| 15 |
+
# Get unique controller_IDs
|
| 16 |
+
unique_controllers = df[controller_column].unique()
|
| 17 |
+
|
| 18 |
+
# Calculate the number of rows needed (4 subplots per row)
|
| 19 |
+
rows_needed = (len(unique_controllers) + 3) // 4 # Round up to ensure enough rows
|
| 20 |
+
|
| 21 |
+
# Create subplot structure with a dynamic number of rows and 4 columns per row
|
| 22 |
+
fig = make_subplots(
|
| 23 |
+
rows=rows_needed,
|
| 24 |
+
cols=4,
|
| 25 |
+
shared_xaxes=False,
|
| 26 |
+
subplot_titles=unique_controllers,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Add a counter for positioning the subplots
|
| 30 |
+
subplot_position = 1
|
| 31 |
+
|
| 32 |
+
# Iterate over each controller_ID
|
| 33 |
+
for controller in unique_controllers:
|
| 34 |
+
# Filter data for each controller_ID (create a small dataframe per controller_ID)
|
| 35 |
+
controller_data = df[df[controller_column] == controller]
|
| 36 |
+
|
| 37 |
+
# Determine the row and column for the current subplot
|
| 38 |
+
row = (subplot_position - 1) // 4 + 1
|
| 39 |
+
col = (subplot_position - 1) % 4 + 1
|
| 40 |
+
|
| 41 |
+
# Add bar chart to the subplot
|
| 42 |
+
fig.add_trace(
|
| 43 |
+
go.Bar(
|
| 44 |
+
x=controller_data[lac_column],
|
| 45 |
+
y=controller_data[count_column],
|
| 46 |
+
name=controller,
|
| 47 |
+
text=controller_data[count_column],
|
| 48 |
+
),
|
| 49 |
+
row=row,
|
| 50 |
+
col=col,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Move to the next subplot position
|
| 54 |
+
subplot_position += 1
|
| 55 |
+
|
| 56 |
+
# Update layout to make it more readable and fit all subplots
|
| 57 |
+
fig.update_layout(
|
| 58 |
+
height=300 * rows_needed,
|
| 59 |
+
title_text=fig_title,
|
| 60 |
+
showlegend=False,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Show the plot
|
| 64 |
+
# fig.show()
|
| 65 |
+
|
| 66 |
+
return fig
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def create_bar_chart(df: pd.DataFrame, title: str = "Chart Title") -> px.bar:
|
| 70 |
+
"""
|
| 71 |
+
Create a bar chart using Plotly Express with the first column as x and the second column as y.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
df (pd.DataFrame): Input DataFrame
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
fig (px.bar): Bar chart figure
|
| 78 |
+
"""
|
| 79 |
+
fig = px.bar(
|
| 80 |
+
df,
|
| 81 |
+
x=df.columns[0],
|
| 82 |
+
y=df.columns[1],
|
| 83 |
+
text_auto=True,
|
| 84 |
+
title=title,
|
| 85 |
+
height=300,
|
| 86 |
+
width=600,
|
| 87 |
+
)
|
| 88 |
+
fig.update_xaxes(tickvals=df[df.columns[0]].unique())
|
| 89 |
+
return fig
|
utils/utils_functions.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from geopy.distance import geodesic
|
| 5 |
+
|
| 6 |
+
# Function to calculate distances while preserving all original columns
|
| 7 |
+
# def calculate_distances(
|
| 8 |
+
# df1: pd.DataFrame,
|
| 9 |
+
# df2: pd.DataFrame,
|
| 10 |
+
# code_col1,
|
| 11 |
+
# lat_col1,
|
| 12 |
+
# long_col1,
|
| 13 |
+
# code_col2,
|
| 14 |
+
# lat_col2,
|
| 15 |
+
# long_col2,
|
| 16 |
+
# min_distance: int = 1,
|
| 17 |
+
# ):
|
| 18 |
+
# distances = []
|
| 19 |
+
|
| 20 |
+
# for _, row1 in df1.iterrows():
|
| 21 |
+
# for _, row2 in df2.iterrows():
|
| 22 |
+
# coord1 = (row1[lat_col1], row1[long_col1])
|
| 23 |
+
# coord2 = (row2[lat_col2], row2[long_col2])
|
| 24 |
+
# distance_km = geodesic(coord1, coord2).kilometers # Compute distance
|
| 25 |
+
|
| 26 |
+
# # Combine all original columns + distance
|
| 27 |
+
# combined_row = {
|
| 28 |
+
# **row1.to_dict(), # Keep all columns from Dataset1
|
| 29 |
+
# **{
|
| 30 |
+
# f"{col}_Dataset2": row2[col] for col in df2.columns
|
| 31 |
+
# }, # Keep all columns from Dataset2
|
| 32 |
+
# "Distance_km": distance_km,
|
| 33 |
+
# }
|
| 34 |
+
# distances.append(combined_row)
|
| 35 |
+
|
| 36 |
+
# df_distances = pd.DataFrame(distances)
|
| 37 |
+
|
| 38 |
+
# # Find the closest point for each Point1
|
| 39 |
+
# df_closest: pd.DataFrame = df_distances.loc[
|
| 40 |
+
# df_distances.groupby(code_col1)["Distance_km"].idxmin()
|
| 41 |
+
# ]
|
| 42 |
+
|
| 43 |
+
# # Find the distnce below min_distance
|
| 44 |
+
# df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
|
| 45 |
+
|
| 46 |
+
# return df_distances, df_closest, df_closest_min_distance
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def calculate_distances(
|
| 50 |
+
df1: pd.DataFrame,
|
| 51 |
+
df2: pd.DataFrame,
|
| 52 |
+
code_col1: str,
|
| 53 |
+
lat_col1: str,
|
| 54 |
+
long_col1: str,
|
| 55 |
+
code_col2: str,
|
| 56 |
+
lat_col2: str,
|
| 57 |
+
long_col2: str,
|
| 58 |
+
min_distance: float = 1.0,
|
| 59 |
+
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
| 60 |
+
"""
|
| 61 |
+
Calculate distances between points in two datasets and find closest matches.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
df1: First DataFrame containing reference points
|
| 65 |
+
df2: Second DataFrame containing points to compare
|
| 66 |
+
code_col1: Column name in df1 containing point identifiers
|
| 67 |
+
lat_col1: Column name in df1 containing latitude
|
| 68 |
+
long_col1: Column name in df1 containing longitude
|
| 69 |
+
code_col2: Column name in df2 containing point identifiers
|
| 70 |
+
lat_col2: Column name in df2 containing latitude
|
| 71 |
+
long_col2: Column name in df2 containing longitude
|
| 72 |
+
min_distance: Minimum distance threshold in kilometers
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
tuple: (all_distances, closest_matches, matches_below_threshold)
|
| 76 |
+
"""
|
| 77 |
+
# Validate input columns
|
| 78 |
+
required_cols_1 = {code_col1, lat_col1, long_col1}
|
| 79 |
+
required_cols_2 = {code_col2, lat_col2, long_col2}
|
| 80 |
+
|
| 81 |
+
if not required_cols_1.issubset(df1.columns):
|
| 82 |
+
raise ValueError(
|
| 83 |
+
f"df1 is missing required columns: {required_cols_1 - set(df1.columns)}"
|
| 84 |
+
)
|
| 85 |
+
if not required_cols_2.issubset(df2.columns):
|
| 86 |
+
raise ValueError(
|
| 87 |
+
f"df2 is missing required columns: {required_cols_2 - set(df2.columns)}"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Convert to list of tuples for vectorized operations
|
| 91 |
+
coords1 = df1[[lat_col1, long_col1]].apply(tuple, axis=1).tolist()
|
| 92 |
+
coords2 = df2[[lat_col2, long_col2]].apply(tuple, axis=1).tolist()
|
| 93 |
+
|
| 94 |
+
# Calculate all pairwise distances
|
| 95 |
+
distances = []
|
| 96 |
+
for i, coord1 in enumerate(coords1):
|
| 97 |
+
for j, coord2 in enumerate(coords2):
|
| 98 |
+
try:
|
| 99 |
+
distance_km = geodesic(coord1, coord2).kilometers
|
| 100 |
+
distances.append(
|
| 101 |
+
{
|
| 102 |
+
**df1.iloc[i].to_dict(),
|
| 103 |
+
**{f"{col}_Dataset2": df2.iloc[j][col] for col in df2.columns},
|
| 104 |
+
"Distance_km": distance_km,
|
| 105 |
+
}
|
| 106 |
+
)
|
| 107 |
+
except ValueError as e:
|
| 108 |
+
warnings.warn(
|
| 109 |
+
f"Skipping invalid coordinates: {coord1} or {coord2}: {e}"
|
| 110 |
+
)
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
if not distances:
|
| 114 |
+
raise ValueError("No valid coordinate pairs were processed")
|
| 115 |
+
|
| 116 |
+
df_distances = pd.DataFrame(distances)
|
| 117 |
+
|
| 118 |
+
# Find closest matches
|
| 119 |
+
df_closest = df_distances.loc[
|
| 120 |
+
df_distances.groupby(code_col1)["Distance_km"].idxmin()
|
| 121 |
+
]
|
| 122 |
+
|
| 123 |
+
# Filter by minimum distance
|
| 124 |
+
df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
|
| 125 |
+
|
| 126 |
+
return df_distances, df_closest, df_closest_min_distance
|
utils/utils_vars.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
# url = "https://raw.githubusercontent.com/DavMelchi/STORAGE/refs/heads/main/physical_db/physical_database.csv"
|
| 5 |
+
url = r"./physical_db/physical_database.csv"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_physical_db():
|
| 9 |
+
"""
|
| 10 |
+
Reads the physical_database.csv file from the physical_db directory and
|
| 11 |
+
returns a pandas DataFrame containing only the columns 'Code_Sector',
|
| 12 |
+
'Azimut', 'Longitude', 'Latitude', and 'Hauteur'.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
pd.DataFrame: A DataFrame containing the filtered columns.
|
| 16 |
+
"""
|
| 17 |
+
physical = pd.read_csv(url)
|
| 18 |
+
physical = physical[
|
| 19 |
+
[
|
| 20 |
+
"Code_Sector",
|
| 21 |
+
"Azimut",
|
| 22 |
+
"Longitude",
|
| 23 |
+
"Latitude",
|
| 24 |
+
"Hauteur",
|
| 25 |
+
"City",
|
| 26 |
+
"Adresse",
|
| 27 |
+
"Commune",
|
| 28 |
+
"Cercle",
|
| 29 |
+
]
|
| 30 |
+
]
|
| 31 |
+
return physical
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class UtilsVars:
|
| 35 |
+
sector_mapping = {
|
| 36 |
+
4: 1,
|
| 37 |
+
5: 2,
|
| 38 |
+
6: 3,
|
| 39 |
+
11: 1,
|
| 40 |
+
12: 2,
|
| 41 |
+
13: 3,
|
| 42 |
+
71: 1,
|
| 43 |
+
72: 2,
|
| 44 |
+
73: 3,
|
| 45 |
+
81: 1,
|
| 46 |
+
82: 2,
|
| 47 |
+
83: 3,
|
| 48 |
+
}
|
| 49 |
+
type_cellule = {1: "Macro Cell 1800", 0: "Macro Cell 900"}
|
| 50 |
+
oml_band_frequence = {1: "OML BAND GSM 1800", 0: "OML BAND GSM 900"}
|
| 51 |
+
gsm_band = {1: "G1800", 0: "G900"}
|
| 52 |
+
configuration_schema = {1: "EGPRS 1800", 0: "EGPRS 900"}
|
| 53 |
+
channeltype_mapping = {4: "BCCH", 3: "TRX_TCH"}
|
| 54 |
+
oml_lte_freq_band = {
|
| 55 |
+
"L1800": "OML E-UTRA Band 3 - 20MHz",
|
| 56 |
+
"L800": "OML E-UTRA Band 20 - 20MHz",
|
| 57 |
+
"L2300": "OML E-UTRA Band 43 - 20MHz",
|
| 58 |
+
"L2600": "OML E-UTRA Band 7 - 20MHz",
|
| 59 |
+
"L700": "OML E-UTRA Band 28 - 20MHz",
|
| 60 |
+
}
|
| 61 |
+
porteuse_mapping = {
|
| 62 |
+
3004: "OML UTRA Band VIII",
|
| 63 |
+
3006: "OML UTRA Band VIII",
|
| 64 |
+
10812: "OML UTRA Band I",
|
| 65 |
+
10787: "OML UTRA Band I",
|
| 66 |
+
10837: "OML UTRA Band I",
|
| 67 |
+
}
|
| 68 |
+
color_mapping = {
|
| 69 |
+
"U900": "7fff0000",
|
| 70 |
+
"U2100": "7f00ff00",
|
| 71 |
+
"G900": "7fff0000",
|
| 72 |
+
"G1800": "7f00ff00",
|
| 73 |
+
"L800": "7fff0000",
|
| 74 |
+
"L1800": "7f00ff00",
|
| 75 |
+
"L2300": "7f00ffff",
|
| 76 |
+
"L2600": "7f0000ff",
|
| 77 |
+
"L700": "7fff00ff",
|
| 78 |
+
}
|
| 79 |
+
size_mapping = {
|
| 80 |
+
"U900": 100,
|
| 81 |
+
"U2100": 120,
|
| 82 |
+
"G900": 100,
|
| 83 |
+
"G1800": 120,
|
| 84 |
+
"L800": 120,
|
| 85 |
+
"L1800": 140,
|
| 86 |
+
"L2300": 100,
|
| 87 |
+
"L2600": 90,
|
| 88 |
+
"L700": 80,
|
| 89 |
+
}
|
| 90 |
+
lte_band = {
|
| 91 |
+
1786: "L1800",
|
| 92 |
+
6350: "L800",
|
| 93 |
+
3050: "L2600",
|
| 94 |
+
38750: "L2300",
|
| 95 |
+
1761: "L1800",
|
| 96 |
+
9260: "L700",
|
| 97 |
+
}
|
| 98 |
+
wcdma_band = {
|
| 99 |
+
3004: "U900",
|
| 100 |
+
3006: "U900",
|
| 101 |
+
10787: "U2100",
|
| 102 |
+
10837: "U2100",
|
| 103 |
+
10812: "U2100",
|
| 104 |
+
}
|
| 105 |
+
bsc_name = {
|
| 106 |
+
403698: "MBSCTST",
|
| 107 |
+
403699: "MBSC01",
|
| 108 |
+
403701: "MBSC04",
|
| 109 |
+
403702: "MBSC03",
|
| 110 |
+
403703: "MBSC02",
|
| 111 |
+
406283: "MBSKTL01",
|
| 112 |
+
406284: "MBSSEG01",
|
| 113 |
+
406308: "MBSSK0S1",
|
| 114 |
+
406309: "ASBSCMSC3",
|
| 115 |
+
}
|
| 116 |
+
final_lte_database = ""
|
| 117 |
+
final_gsm_database = ""
|
| 118 |
+
final_wcdma_database = ""
|
| 119 |
+
final_trx_database = ""
|
| 120 |
+
final_mrbts_database = ""
|
| 121 |
+
final_invunit_database = ""
|
| 122 |
+
final_mal_database = ""
|
| 123 |
+
gsm_dfs = []
|
| 124 |
+
wcdma_dfs = []
|
| 125 |
+
lte_dfs = []
|
| 126 |
+
all_db_dfs = []
|
| 127 |
+
all_db_dfs_names = []
|
| 128 |
+
final_all_database = None
|
| 129 |
+
atoll_dfs = []
|
| 130 |
+
final_atoll_database = None
|
| 131 |
+
final_nice_database = None
|
| 132 |
+
neighbors_database = ""
|
| 133 |
+
file_path = ""
|
| 134 |
+
gsm_kml_file = None
|
| 135 |
+
wcdma_kml_file = None
|
| 136 |
+
lte_kml_file = None
|
| 137 |
+
adjl_database = None
|
| 138 |
+
# physisal_db = get_physical_db()
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def get_band(text):
|
| 142 |
+
"""
|
| 143 |
+
Extract the band from the given string.
|
| 144 |
+
|
| 145 |
+
Parameters
|
| 146 |
+
----------
|
| 147 |
+
text : str
|
| 148 |
+
The string to extract the band from.
|
| 149 |
+
|
| 150 |
+
Returns
|
| 151 |
+
-------
|
| 152 |
+
str or np.nan
|
| 153 |
+
The extracted band, or NaN if the text was not a string or did not contain
|
| 154 |
+
any of the recognized bands (L1800, L2300, L800).
|
| 155 |
+
"""
|
| 156 |
+
if isinstance(text, str): # Check if text is a string
|
| 157 |
+
if "L1800" in text:
|
| 158 |
+
return "L1800"
|
| 159 |
+
elif "L2300" in text:
|
| 160 |
+
return "L2300"
|
| 161 |
+
elif "L800" in text:
|
| 162 |
+
return "L800"
|
| 163 |
+
elif "L2600" in text:
|
| 164 |
+
return "L2600"
|
| 165 |
+
elif "L700" in text:
|
| 166 |
+
return "L700"
|
| 167 |
+
return np.nan # or return None
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def clean_bands(bands):
|
| 171 |
+
if pd.isna(bands):
|
| 172 |
+
return None
|
| 173 |
+
parts = [p for p in bands.split("/") if p != "nan"]
|
| 174 |
+
return "/".join(parts) if parts else None
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
class GsmAnalysisData:
|
| 178 |
+
total_number_of_bsc = 0
|
| 179 |
+
total_number_of_cell = 0
|
| 180 |
+
number_of_site = 0
|
| 181 |
+
number_of_cell_per_bsc = pd.DataFrame()
|
| 182 |
+
number_of_site_per_bsc = pd.DataFrame()
|
| 183 |
+
number_of_bts_name_empty = 0
|
| 184 |
+
number_of_bcf_name_empty = 0
|
| 185 |
+
number_of_bcch_empty = 0
|
| 186 |
+
bts_administate_distribution = pd.DataFrame()
|
| 187 |
+
trx_administate_distribution = pd.DataFrame()
|
| 188 |
+
number_of_trx_per_bsc = pd.DataFrame()
|
| 189 |
+
number_of_cell_per_lac = pd.DataFrame()
|
| 190 |
+
number_of_site_per_lac = pd.DataFrame()
|
| 191 |
+
trx_frequency_distribution = pd.DataFrame()
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
class WcdmaAnalysisData:
|
| 195 |
+
total_number_of_rnc = 0
|
| 196 |
+
total_number_of_wcel = 0
|
| 197 |
+
number_of_site = 0
|
| 198 |
+
number_of_site_per_rnc = 0
|
| 199 |
+
number_of_cell_per_rnc = pd.DataFrame()
|
| 200 |
+
number_of_empty_wbts_name = 0
|
| 201 |
+
number_of_empty_wcel_name = 0
|
| 202 |
+
wcel_administate_distribution = pd.DataFrame()
|
| 203 |
+
psc_distribution = pd.DataFrame()
|
| 204 |
+
number_of_cell_per_lac = pd.DataFrame()
|
| 205 |
+
number_of_site_per_lac = pd.DataFrame()
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class LteFddAnalysisData:
|
| 209 |
+
total_number_of_lncel = 0
|
| 210 |
+
total_number_of_site = 0
|
| 211 |
+
number_of_empty_lncel_name = 0
|
| 212 |
+
number_of_empty_lncel_cellname = 0
|
| 213 |
+
number_of_empty_lnbts_name = 0
|
| 214 |
+
number_of_cell_per_band = pd.DataFrame()
|
| 215 |
+
phycellid_distribution = pd.DataFrame()
|
| 216 |
+
rootsequenceindex_distribution = pd.DataFrame()
|
| 217 |
+
lncel_administate_distribution = pd.DataFrame()
|
| 218 |
+
number_of_cell_per_tac = pd.DataFrame()
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
class LteTddAnalysisData:
|
| 222 |
+
total_number_of_lncel = 0
|
| 223 |
+
total_number_of_site = 0
|
| 224 |
+
number_of_empty_lncel_name = 0
|
| 225 |
+
number_of_empty_lncel_cellname = 0
|
| 226 |
+
number_of_empty_lnbts_name = 0
|
| 227 |
+
number_of_cell_per_band = pd.DataFrame()
|
| 228 |
+
phycellid_distribution = pd.DataFrame()
|
| 229 |
+
rootsequenceindex_distribution = pd.DataFrame()
|
| 230 |
+
lncel_administate_distribution = pd.DataFrame()
|
| 231 |
+
number_of_cell_per_tac = pd.DataFrame()
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
class SiteAnalysisData:
|
| 235 |
+
total_number_of_site = 0
|
| 236 |
+
total_munber_of_gsm_site = 0
|
| 237 |
+
total_number_of_wcdma_site = 0
|
| 238 |
+
total_number_of_lte_site = 0
|
| 239 |
+
gsm_bands_distribution = pd.DataFrame()
|
| 240 |
+
wcdma_bands_distribution = pd.DataFrame()
|
| 241 |
+
lte_bands_distribution = pd.DataFrame()
|
| 242 |
+
all_bands_distribution = pd.DataFrame()
|
| 243 |
+
number_of_trx_per_site_distribution = pd.DataFrame()
|