github-actions commited on
Commit
a0e03be
·
0 Parent(s):

Deploy Panel Space

Browse files
Files changed (43) hide show
  1. Dockerfile +14 -0
  2. README.md +11 -0
  3. data/kpi_health_check_presets/presets_1.json +301 -0
  4. data/kpi_health_check_profiles/Profil_1.json +32 -0
  5. panel_app/convert_to_excel_panel.py +55 -0
  6. panel_app/kpi_health_check_drilldown_plots.py +360 -0
  7. panel_app/kpi_health_check_panel.py +0 -0
  8. panel_app/kpi_health_check_panel_v2.py +0 -0
  9. panel_app/panel_portal.py +121 -0
  10. panel_app/panel_v2_backend.py +128 -0
  11. panel_app/trafic_analysis_panel.py +2459 -0
  12. physical_db/physical_database.csv +0 -0
  13. process_kpi/__init__.py +0 -0
  14. process_kpi/gsm_kpi_requirements.md +47 -0
  15. process_kpi/kpi_health_check/__init__.py +0 -0
  16. process_kpi/kpi_health_check/benchmarks.py +42 -0
  17. process_kpi/kpi_health_check/engine.py +293 -0
  18. process_kpi/kpi_health_check/engine_v2.py +320 -0
  19. process_kpi/kpi_health_check/export.py +264 -0
  20. process_kpi/kpi_health_check/io.py +45 -0
  21. process_kpi/kpi_health_check/kpi_groups.py +96 -0
  22. process_kpi/kpi_health_check/multi_rat.py +253 -0
  23. process_kpi/kpi_health_check/normalization.py +292 -0
  24. process_kpi/kpi_health_check/presets.py +79 -0
  25. process_kpi/kpi_health_check/profiles.py +71 -0
  26. process_kpi/kpi_health_check/rules.py +132 -0
  27. process_kpi/lte_kpi_requirements.md +46 -0
  28. process_kpi/process_gsm_capacity.py +719 -0
  29. process_kpi/process_lcg_capacity.py +286 -0
  30. process_kpi/process_lte_capacity.py +528 -0
  31. process_kpi/process_wbts_capacity.py +312 -0
  32. process_kpi/process_wcel_capacity.py +348 -0
  33. requirements.txt +10 -0
  34. utils/azimuth_validation.py +35 -0
  35. utils/check_sheet_exist.py +90 -0
  36. utils/config_band.py +156 -0
  37. utils/convert_to_excel.py +365 -0
  38. utils/extract_code.py +34 -0
  39. utils/kml_creator.py +79 -0
  40. utils/kpi_analysis_utils.py +666 -0
  41. utils/rnc_bsc_lac_count_chart.py +89 -0
  42. utils/utils_functions.py +126 -0
  43. utils/utils_vars.py +243 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ WORKDIR /app
5
+
6
+ COPY --chown=user:user requirements.txt /app/requirements.txt
7
+ RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
8
+
9
+ COPY --chown=user:user . /app
10
+
11
+ USER user
12
+ EXPOSE 7860
13
+
14
+ CMD ["panel", "serve", "panel_app/panel_portal.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin=*", "--num-procs", "1", "--log-level", "info"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: KPI Analysis (Panel)
3
+ emoji: "📊"
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ This Space runs the Panel portal located at `panel_app/panel_portal.py`.
data/kpi_health_check_presets/presets_1.json ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Test1",
3
+ "saved_at": "2025-12-13T13:16:30.212771Z",
4
+ "rules": [
5
+ {
6
+ "RAT": "2G",
7
+ "KPI": "2G_Carried Traffic",
8
+ "direction": "higher_is_better",
9
+ "sla": NaN
10
+ },
11
+ {
12
+ "RAT": "2G",
13
+ "KPI": "Data CSSR",
14
+ "direction": "higher_is_better",
15
+ "sla": 90.0
16
+ },
17
+ {
18
+ "RAT": "2G",
19
+ "KPI": "FT_2G_SDCCH_Drop_rate_1",
20
+ "direction": "lower_is_better",
21
+ "sla": 2.0
22
+ },
23
+ {
24
+ "RAT": "2G",
25
+ "KPI": "Handover success rate",
26
+ "direction": "higher_is_better",
27
+ "sla": 98.0
28
+ },
29
+ {
30
+ "RAT": "2G",
31
+ "KPI": "PS_UL_Load",
32
+ "direction": "higher_is_better",
33
+ "sla": NaN
34
+ },
35
+ {
36
+ "RAT": "2G",
37
+ "KPI": "SDCCH real blocking",
38
+ "direction": "lower_is_better",
39
+ "sla": 2.0
40
+ },
41
+ {
42
+ "RAT": "2G",
43
+ "KPI": "TCH availability ratio",
44
+ "direction": "higher_is_better",
45
+ "sla": 98.0
46
+ },
47
+ {
48
+ "RAT": "2G",
49
+ "KPI": "TCH call blocking",
50
+ "direction": "lower_is_better",
51
+ "sla": 2.0
52
+ },
53
+ {
54
+ "RAT": "2G",
55
+ "KPI": "TCH_ABIS_FAIL_CALL (c001084)",
56
+ "direction": "lower_is_better",
57
+ "sla": 10.0
58
+ },
59
+ {
60
+ "RAT": "2G",
61
+ "KPI": "TRAFFIC_PS DL",
62
+ "direction": "higher_is_better",
63
+ "sla": NaN
64
+ },
65
+ {
66
+ "RAT": "2G",
67
+ "KPI": "Voice CSSR%_",
68
+ "direction": "higher_is_better",
69
+ "sla": 98.0
70
+ },
71
+ {
72
+ "RAT": "2G",
73
+ "KPI": "Voice_DCR_OML",
74
+ "direction": "lower_is_better",
75
+ "sla": 2.0
76
+ },
77
+ {
78
+ "RAT": "3G",
79
+ "KPI": "3G Call Setup Success Rate PS",
80
+ "direction": "higher_is_better",
81
+ "sla": 98.0
82
+ },
83
+ {
84
+ "RAT": "3G",
85
+ "KPI": "3G Drop Call Rate CS",
86
+ "direction": "lower_is_better",
87
+ "sla": 2.0
88
+ },
89
+ {
90
+ "RAT": "3G",
91
+ "KPI": "3G Drop Call Rate - All Data services",
92
+ "direction": "lower_is_better",
93
+ "sla": 2.0
94
+ },
95
+ {
96
+ "RAT": "3G",
97
+ "KPI": "3G HSUPA_USER_THROUGHPUT_BOTH_MT",
98
+ "direction": "higher_is_better",
99
+ "sla": NaN
100
+ },
101
+ {
102
+ "RAT": "3G",
103
+ "KPI": "Average RTWP",
104
+ "direction": "lower_is_better",
105
+ "sla": -89.0
106
+ },
107
+ {
108
+ "RAT": "3G",
109
+ "KPI": "CS_CALL_RADIO_CONN_LOST (M1006C311)",
110
+ "direction": "lower_is_better",
111
+ "sla": 100.0
112
+ },
113
+ {
114
+ "RAT": "3G",
115
+ "KPI": "Cell Availability, excluding blocked by user state (BLU)",
116
+ "direction": "higher_is_better",
117
+ "sla": 98.0
118
+ },
119
+ {
120
+ "RAT": "3G",
121
+ "KPI": "FT_Soft_HO_Success_Rate_1",
122
+ "direction": "higher_is_better",
123
+ "sla": 98.0
124
+ },
125
+ {
126
+ "RAT": "3G",
127
+ "KPI": "HSDPA congestion rate in Iub",
128
+ "direction": "lower_is_better",
129
+ "sla": 10.0
130
+ },
131
+ {
132
+ "RAT": "3G",
133
+ "KPI": "HSDPA_USER_THROUGHPUT",
134
+ "direction": "higher_is_better",
135
+ "sla": NaN
136
+ },
137
+ {
138
+ "RAT": "3G",
139
+ "KPI": "IUB_LOSS_CC_FRAME_LOSS_IND (M1022C71)",
140
+ "direction": "lower_is_better",
141
+ "sla": 100.0
142
+ },
143
+ {
144
+ "RAT": "3G",
145
+ "KPI": "Total CS traffic - Erl",
146
+ "direction": "higher_is_better",
147
+ "sla": NaN
148
+ },
149
+ {
150
+ "RAT": "3G",
151
+ "KPI": "Total_Data_Traffic",
152
+ "direction": "higher_is_better",
153
+ "sla": NaN
154
+ },
155
+ {
156
+ "RAT": "3G",
157
+ "KPI": "ft_cs_call_setup_success_rate",
158
+ "direction": "higher_is_better",
159
+ "sla": 98.0
160
+ },
161
+ {
162
+ "RAT": "3G",
163
+ "KPI": "ft_hsdpa_call_setup_succ_rate",
164
+ "direction": "higher_is_better",
165
+ "sla": 98.0
166
+ },
167
+ {
168
+ "RAT": "3G",
169
+ "KPI": "ft_hsupa_call_setup_succ_rate",
170
+ "direction": "higher_is_better",
171
+ "sla": 98.0
172
+ },
173
+ {
174
+ "RAT": "LTE",
175
+ "KPI": "% MIMO RI 2",
176
+ "direction": "higher_is_better",
177
+ "sla": 50.0
178
+ },
179
+ {
180
+ "RAT": "LTE",
181
+ "KPI": "4G/LTE CALL SETUP SUCCESS RATE (WITHOUT VOLTE)",
182
+ "direction": "higher_is_better",
183
+ "sla": 98.0
184
+ },
185
+ {
186
+ "RAT": "LTE",
187
+ "KPI": "4G/LTE DL Traffic Volume (GBytes)",
188
+ "direction": "higher_is_better",
189
+ "sla": NaN
190
+ },
191
+ {
192
+ "RAT": "LTE",
193
+ "KPI": "4G/LTE UL Traffic Volume (GBytes)",
194
+ "direction": "higher_is_better",
195
+ "sla": NaN
196
+ },
197
+ {
198
+ "RAT": "LTE",
199
+ "KPI": "AVE 4G/LTE DL USER THRPUT (ALL)KBnew",
200
+ "direction": "higher_is_better",
201
+ "sla": NaN
202
+ },
203
+ {
204
+ "RAT": "LTE",
205
+ "KPI": "AVE 4G/LTE UL USER THRPUT (ALL) (Knew",
206
+ "direction": "higher_is_better",
207
+ "sla": NaN
208
+ },
209
+ {
210
+ "RAT": "LTE",
211
+ "KPI": "AVG_RTWP_RX_ANT_1 (M8005C306)",
212
+ "direction": "lower_is_better",
213
+ "sla": -800.0
214
+ },
215
+ {
216
+ "RAT": "LTE",
217
+ "KPI": "AVG_RTWP_RX_ANT_2 (M8005C307)",
218
+ "direction": "lower_is_better",
219
+ "sla": -800.0
220
+ },
221
+ {
222
+ "RAT": "LTE",
223
+ "KPI": "Avg RRC conn UE",
224
+ "direction": "higher_is_better",
225
+ "sla": NaN
226
+ },
227
+ {
228
+ "RAT": "LTE",
229
+ "KPI": "Avg UE distance",
230
+ "direction": "higher_is_better",
231
+ "sla": NaN,
232
+ "policy": "notify"
233
+ },
234
+ {
235
+ "RAT": "LTE",
236
+ "KPI": "CSFB_V6",
237
+ "direction": "higher_is_better",
238
+ "sla": 98.0
239
+ },
240
+ {
241
+ "RAT": "LTE",
242
+ "KPI": "Call Drop Rate_ 4G New",
243
+ "direction": "lower_is_better",
244
+ "sla": 2.0
245
+ },
246
+ {
247
+ "RAT": "LTE",
248
+ "KPI": "Cell Avail excl BLU",
249
+ "direction": "higher_is_better",
250
+ "sla": 98.0
251
+ },
252
+ {
253
+ "RAT": "LTE",
254
+ "KPI": "E-UTRAN Avg PRB usage per TTI DL",
255
+ "direction": "lower_is_better",
256
+ "sla": 50.0
257
+ },
258
+ {
259
+ "RAT": "LTE",
260
+ "KPI": "E-UTRAN E-RAB stp SR",
261
+ "direction": "higher_is_better",
262
+ "sla": 98.0
263
+ },
264
+ {
265
+ "RAT": "LTE",
266
+ "KPI": "E-UTRAN Intra-Freq HO SR",
267
+ "direction": "higher_is_better",
268
+ "sla": 98.0
269
+ },
270
+ {
271
+ "RAT": "LTE",
272
+ "KPI": "E-UTRAN RRC Paging Discard Ratio",
273
+ "direction": "lower_is_better",
274
+ "sla": 1.0
275
+ },
276
+ {
277
+ "RAT": "LTE",
278
+ "KPI": "Intra eNB HO SR",
279
+ "direction": "higher_is_better",
280
+ "sla": 98.0
281
+ },
282
+ {
283
+ "RAT": "LTE",
284
+ "KPI": "Max PDCP Thr DL (classic eNB)",
285
+ "direction": "higher_is_better",
286
+ "sla": NaN
287
+ },
288
+ {
289
+ "RAT": "LTE",
290
+ "KPI": "Total E-UTRAN RRC conn stp SR",
291
+ "direction": "higher_is_better",
292
+ "sla": 98.0
293
+ },
294
+ {
295
+ "RAT": "LTE",
296
+ "KPI": "UE-spec log S1 sig conn SR",
297
+ "direction": "higher_is_better",
298
+ "sla": 98.0
299
+ }
300
+ ]
301
+ }
data/kpi_health_check_profiles/Profil_1.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Profil_1",
3
+ "saved_at": "2025-12-13T13:16:45.937845Z",
4
+ "config": {
5
+ "analysis_range": [
6
+ null,
7
+ null
8
+ ],
9
+ "baseline_days": 30,
10
+ "recent_days": 7,
11
+ "rel_threshold_pct": 10.0,
12
+ "min_consecutive_days": 3,
13
+ "min_criticality": 0,
14
+ "min_anomaly_score": 0,
15
+ "city_filter": "",
16
+ "top_rat_filter": [
17
+ "2G",
18
+ "3G",
19
+ "LTE"
20
+ ],
21
+ "top_status_filter": [
22
+ "DEGRADED",
23
+ "PERSISTENT_DEGRADED"
24
+ ],
25
+ "preset_selected": "presets_1.json",
26
+ "drilldown": {
27
+ "site_code": 2130,
28
+ "rat": "LTE",
29
+ "kpi": ""
30
+ }
31
+ }
32
+ }
panel_app/convert_to_excel_panel.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import time
3
+ from typing import Iterable, Sequence
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def write_dfs_to_excel(
9
+ dfs: Sequence[pd.DataFrame],
10
+ sheet_names: Sequence[str],
11
+ index: bool = True,
12
+ profile: dict | None = None,
13
+ ) -> bytes:
14
+ """Simple Excel export for Panel.
15
+
16
+ Writes the given DataFrames to an in-memory XLSX file and returns the bytes.
17
+ No Streamlit dependency and no heavy formatting, to keep Panel exports fast
18
+ and avoid Streamlit runtime warnings.
19
+ """
20
+ bytes_io = io.BytesIO()
21
+ t0 = time.perf_counter() if profile is not None else 0.0
22
+ with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
23
+ for df, name in zip(dfs, sheet_names):
24
+ # Ensure we always write a valid DataFrame, even if None was passed
25
+ safe_df = df if isinstance(df, pd.DataFrame) else pd.DataFrame()
26
+ t_sheet0 = time.perf_counter() if profile is not None else 0.0
27
+ safe_df.to_excel(writer, sheet_name=str(name), index=index)
28
+ t_sheet1 = time.perf_counter() if profile is not None else 0.0
29
+
30
+ if profile is not None:
31
+ sheets = profile.get("excel_sheets")
32
+ if not isinstance(sheets, list):
33
+ sheets = []
34
+ profile["excel_sheets"] = sheets
35
+ try:
36
+ rows = int(len(safe_df))
37
+ except Exception: # noqa: BLE001
38
+ rows = 0
39
+ try:
40
+ cols = int(safe_df.shape[1])
41
+ except Exception: # noqa: BLE001
42
+ cols = 0
43
+ sheets.append(
44
+ {
45
+ "name": str(name),
46
+ "rows": rows,
47
+ "cols": cols,
48
+ "seconds": float(t_sheet1 - t_sheet0),
49
+ }
50
+ )
51
+
52
+ if profile is not None:
53
+ profile["excel_total_seconds"] = float(time.perf_counter() - t0)
54
+
55
+ return bytes_io.getvalue()
panel_app/kpi_health_check_drilldown_plots.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+
3
+ import pandas as pd
4
+ import plotly.graph_objects as go
5
+ from plotly.subplots import make_subplots
6
+
7
+ from process_kpi.kpi_health_check.engine import is_bad
8
+
9
+
10
+ def build_drilldown_plot(
11
+ df: pd.DataFrame,
12
+ kpis: list[str],
13
+ rules_df: pd.DataFrame | None = None,
14
+ highlight_bad_days: bool = True,
15
+ show_sla: bool = True,
16
+ site_code: str | int = "",
17
+ rat: str = "",
18
+ main_kpi: str | None = None,
19
+ baseline_days_n: int = 30,
20
+ recent_days_n: int = 7,
21
+ rel_threshold_pct: float = 10.0,
22
+ normalization: str = "None",
23
+ granularity: str = "Daily",
24
+ ) -> go.Figure | None:
25
+ """
26
+ Builds the drill-down trend plot with native Plotly annotations.
27
+ """
28
+ if df is None or df.empty or not kpis:
29
+ return None
30
+
31
+ # Filter columns that exist
32
+ valid_kpis = [k for k in kpis if k in df.columns]
33
+ if not valid_kpis:
34
+ return None
35
+
36
+ g = str(granularity or "Daily").strip().lower()
37
+ is_hourly = g.startswith("hour") or g.startswith("h")
38
+ time_col = (
39
+ "period_start" if (is_hourly and "period_start" in df.columns) else "date_only"
40
+ )
41
+
42
+ plot_df = df.sort_values(time_col).copy()
43
+
44
+ try:
45
+ plot_df[time_col] = pd.to_datetime(plot_df[time_col])
46
+ except Exception:
47
+ pass
48
+
49
+ if main_kpi is None and valid_kpis:
50
+ main_kpi = valid_kpis[0]
51
+
52
+ title_text = f"{rat} - Site {site_code}"
53
+ # If single KPI, be explicit in title
54
+ if len(valid_kpis) == 1:
55
+ title_text = f"{rat} - Site {site_code} - {valid_kpis[0]}"
56
+
57
+ # Subplot for Timeline (Streak) - Row 2
58
+ # Row 1: Main Trend
59
+ fig = make_subplots(
60
+ rows=2,
61
+ cols=1,
62
+ shared_xaxes=True,
63
+ vertical_spacing=0.05,
64
+ row_heights=[0.85, 0.15],
65
+ subplot_titles=(title_text, "Status Check"),
66
+ )
67
+
68
+ norm_mode = str(normalization or "None").strip()
69
+ do_norm = (norm_mode != "None") and (len(valid_kpis) > 1)
70
+
71
+ n_kpis = len(valid_kpis)
72
+ trace_mode = "lines+markers" if n_kpis <= 3 else "lines"
73
+ marker_size = 6 if n_kpis <= 3 else 0
74
+
75
+ for kpi in valid_kpis:
76
+ # Data preparation
77
+ x_data = plot_df[time_col]
78
+ y_data = pd.to_numeric(plot_df[kpi], errors="coerce")
79
+ if do_norm:
80
+ if norm_mode == "Min-Max":
81
+ vmin = y_data.min(skipna=True)
82
+ vmax = y_data.max(skipna=True)
83
+ if pd.notna(vmin) and pd.notna(vmax) and float(vmax) != float(vmin):
84
+ y_data = (y_data - vmin) / (vmax - vmin)
85
+ else:
86
+ y_data = y_data * 0.0
87
+ elif norm_mode == "Z-score":
88
+ mu = y_data.mean(skipna=True)
89
+ sd = y_data.std(skipna=True)
90
+ if pd.notna(sd) and float(sd) != 0.0:
91
+ y_data = (y_data - mu) / sd
92
+ else:
93
+ y_data = y_data * 0.0
94
+
95
+ # Add Trace
96
+ fig.add_trace(
97
+ go.Scatter(
98
+ x=x_data,
99
+ y=y_data,
100
+ mode=trace_mode,
101
+ name=kpi,
102
+ legendgroup=kpi, # Allows grouping logic if needed
103
+ marker=dict(size=marker_size) if marker_size else None,
104
+ ),
105
+ row=1,
106
+ col=1,
107
+ )
108
+
109
+ # Add SLA line if available
110
+ if show_sla and rules_df is not None:
111
+ try:
112
+ if do_norm:
113
+ continue
114
+ # Find rule for this KPI
115
+ # Note: This implies rules_df is filtered for the correct RAT
116
+ rule = rules_df[rules_df["KPI"] == kpi]
117
+ if not rule.empty:
118
+ pol = (
119
+ str(rule.iloc[0].get("policy", "enforce") or "enforce")
120
+ .strip()
121
+ .lower()
122
+ )
123
+ if pol == "notify":
124
+ continue
125
+ if len(valid_kpis) > 1 and str(kpi) != str(main_kpi):
126
+ continue
127
+ sla_val = pd.to_numeric(rule.iloc[0].get("sla"), errors="coerce")
128
+ if pd.notna(sla_val):
129
+ fig.add_hline(
130
+ y=sla_val,
131
+ line_dash="dot",
132
+ line_color="red",
133
+ annotation_text=f"SLA {kpi}",
134
+ annotation_position="bottom right",
135
+ row=1,
136
+ col=1,
137
+ )
138
+ except Exception:
139
+ pass
140
+
141
+ try:
142
+ if highlight_bad_days and main_kpi and main_kpi in plot_df.columns:
143
+ direction = "higher_is_better"
144
+ policy = "enforce"
145
+ sla_eval = None
146
+ if (
147
+ rules_df is not None
148
+ and isinstance(rules_df, pd.DataFrame)
149
+ and not rules_df.empty
150
+ ):
151
+ rule = rules_df[rules_df["KPI"] == str(main_kpi)]
152
+ if not rule.empty:
153
+ direction = str(
154
+ rule.iloc[0].get("direction", direction) or direction
155
+ )
156
+ policy = (
157
+ str(rule.iloc[0].get("policy", policy) or policy)
158
+ .strip()
159
+ .lower()
160
+ )
161
+ if policy != "notify":
162
+ sla_val = pd.to_numeric(
163
+ rule.iloc[0].get("sla"), errors="coerce"
164
+ )
165
+ if pd.notna(sla_val):
166
+ try:
167
+ sla_eval = float(sla_val)
168
+ except Exception:
169
+ sla_eval = None
170
+
171
+ end_dt = pd.to_datetime(plot_df[time_col]).max()
172
+ if is_hourly:
173
+ rs = end_dt - timedelta(hours=max(int(recent_days_n), 1) * 24 - 1)
174
+ be = rs - timedelta(hours=1)
175
+ bs = be - timedelta(hours=max(int(baseline_days_n), 1) * 24 - 1)
176
+ else:
177
+ rs = end_dt - timedelta(days=max(int(recent_days_n), 1) - 1)
178
+ be = rs - timedelta(days=1)
179
+ bs = be - timedelta(days=max(int(baseline_days_n), 1) - 1)
180
+
181
+ dts = pd.to_datetime(plot_df[time_col])
182
+ baseline_mask = (dts >= bs) & (dts <= be)
183
+ recent_mask = (dts >= rs) & (dts <= end_dt)
184
+ baseline_val = pd.to_numeric(
185
+ plot_df.loc[baseline_mask, str(main_kpi)], errors="coerce"
186
+ ).median()
187
+ baseline_val = float(baseline_val) if pd.notna(baseline_val) else None
188
+
189
+ vals = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
190
+ bad_flags = [
191
+ is_bad(
192
+ float(v) if pd.notna(v) else None,
193
+ baseline_val,
194
+ direction,
195
+ float(rel_threshold_pct),
196
+ sla_eval,
197
+ )
198
+ for v in vals.tolist()
199
+ ]
200
+
201
+ bad_recent = [bool(b) and bool(r) for b, r in zip(bad_flags, recent_mask)]
202
+
203
+ baseline_color = "#bdbdbd"
204
+ ok_color = "#1565c0"
205
+ bad_color = "#f9a825" if policy == "notify" else "#e53935"
206
+
207
+ colors = []
208
+ hover_txt = []
209
+ for is_b, is_base, is_recent in zip(bad_flags, baseline_mask, recent_mask):
210
+ if bool(is_base):
211
+ colors.append(baseline_color)
212
+ hover_txt.append(f"BASELINE ({main_kpi})")
213
+ elif bool(is_recent) and bool(is_b):
214
+ colors.append(bad_color)
215
+ hover_txt.append(f"RECENT BAD ({main_kpi})")
216
+ elif bool(is_recent):
217
+ colors.append(ok_color)
218
+ hover_txt.append(f"RECENT OK ({main_kpi})")
219
+ else:
220
+ colors.append("#e0e0e0")
221
+ hover_txt.append(f"OUTSIDE WINDOW ({main_kpi})")
222
+ fig.add_trace(
223
+ go.Scatter(
224
+ x=plot_df[time_col],
225
+ y=[0] * len(plot_df),
226
+ mode="markers",
227
+ marker=dict(symbol="square", size=10, color=colors),
228
+ showlegend=False,
229
+ hovertext=hover_txt,
230
+ hoverinfo="text",
231
+ ),
232
+ row=2,
233
+ col=1,
234
+ )
235
+
236
+ try:
237
+ y_main = pd.to_numeric(plot_df[str(main_kpi)], errors="coerce")
238
+ if do_norm:
239
+ if norm_mode == "Min-Max":
240
+ vmin = y_main.min(skipna=True)
241
+ vmax = y_main.max(skipna=True)
242
+ if (
243
+ pd.notna(vmin)
244
+ and pd.notna(vmax)
245
+ and float(vmax) != float(vmin)
246
+ ):
247
+ y_main = (y_main - vmin) / (vmax - vmin)
248
+ else:
249
+ y_main = y_main * 0.0
250
+ elif norm_mode == "Z-score":
251
+ mu = y_main.mean(skipna=True)
252
+ sd = y_main.std(skipna=True)
253
+ if pd.notna(sd) and float(sd) != 0.0:
254
+ y_main = (y_main - mu) / sd
255
+ else:
256
+ y_main = y_main * 0.0
257
+
258
+ idx_bad = [i for i, b in enumerate(bad_recent) if bool(b)]
259
+ if idx_bad:
260
+ fig.add_trace(
261
+ go.Scatter(
262
+ x=[plot_df[time_col].iloc[i] for i in idx_bad],
263
+ y=[y_main.iloc[i] for i in idx_bad],
264
+ mode="markers",
265
+ marker=dict(size=10, color=bad_color, symbol="circle"),
266
+ name="Bad days",
267
+ showlegend=(n_kpis <= 3),
268
+ ),
269
+ row=1,
270
+ col=1,
271
+ )
272
+ except Exception:
273
+ pass
274
+ except Exception:
275
+ pass
276
+
277
+ if not plot_df.empty and not highlight_bad_days:
278
+ fig.add_trace(
279
+ go.Scatter(
280
+ x=plot_df[time_col],
281
+ y=[0] * len(plot_df),
282
+ mode="markers",
283
+ opacity=0,
284
+ showlegend=False,
285
+ hoverinfo="skip",
286
+ ),
287
+ row=2,
288
+ col=1,
289
+ )
290
+
291
+ fig.update_layout(
292
+ template="plotly_white",
293
+ height=500,
294
+ margin=dict(l=50, r=50, t=50, b=50),
295
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
296
+ hovermode="x unified",
297
+ )
298
+
299
+ try:
300
+ force_all_dates = False
301
+ try:
302
+ x_min = pd.to_datetime(plot_df[time_col]).min()
303
+ x_max = pd.to_datetime(plot_df[time_col]).max()
304
+ span_days = int((x_max - x_min).days) + 1
305
+ n_dates = int(pd.to_datetime(plot_df[time_col]).nunique())
306
+ force_all_dates = (span_days <= 200) and (n_dates <= 200)
307
+ except Exception:
308
+ force_all_dates = False
309
+
310
+ if do_norm:
311
+ fig.update_yaxes(title_text=f"Normalized ({norm_mode})", row=1, col=1)
312
+ else:
313
+ fig.update_yaxes(title_text="Value", row=1, col=1)
314
+ fig.update_yaxes(
315
+ showticklabels=False,
316
+ showgrid=False,
317
+ zeroline=False,
318
+ range=[-1, 1],
319
+ row=2,
320
+ col=1,
321
+ )
322
+
323
+ if force_all_dates and not is_hourly:
324
+ fig.update_xaxes(
325
+ tickmode="linear",
326
+ dtick=86400000,
327
+ tickformat="%d-%b",
328
+ tickangle=-90,
329
+ tickfont=dict(size=10),
330
+ automargin=True,
331
+ ticks="outside",
332
+ ticklen=6,
333
+ showgrid=True,
334
+ row=2,
335
+ col=1,
336
+ )
337
+ else:
338
+ fig.update_xaxes(
339
+ tickangle=-45,
340
+ automargin=True,
341
+ ticks="outside",
342
+ ticklen=6,
343
+ showgrid=True,
344
+ tickformatstops=[
345
+ {
346
+ "dtickrange": [None, 86400000],
347
+ "value": "%d-%b\n%H:%M" if is_hourly else "%d-%b\n%Y",
348
+ },
349
+ {"dtickrange": [86400000, 7 * 86400000], "value": "%d-%b"},
350
+ {"dtickrange": [7 * 86400000, "M1"], "value": "%d-%b"},
351
+ {"dtickrange": ["M1", "M12"], "value": "%b\n%Y"},
352
+ {"dtickrange": ["M12", None], "value": "%Y"},
353
+ ],
354
+ row=2,
355
+ col=1,
356
+ )
357
+ except Exception:
358
+ pass
359
+
360
+ return fig
panel_app/kpi_health_check_panel.py ADDED
The diff for this file is too large to render. See raw diff
 
panel_app/kpi_health_check_panel_v2.py ADDED
The diff for this file is too large to render. See raw diff
 
panel_app/panel_portal.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import panel as pn
5
+
6
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
7
+ if ROOT_DIR not in sys.path:
8
+ sys.path.insert(0, ROOT_DIR)
9
+
10
+ pn.extension("plotly", "tabulator")
11
+
12
+ # Import pages (kept as modules, not nested templates)
13
+ from panel_app import (
14
+ kpi_health_check_panel,
15
+ kpi_health_check_panel_v2,
16
+ trafic_analysis_panel,
17
+ )
18
+
19
+ PAGES = {
20
+ "📊 Global Traffic Analysis": {
21
+ "get_components": trafic_analysis_panel.get_page_components,
22
+ "description": "Analyse trafic multi-RAT + cartes + exports.",
23
+ },
24
+ "📈 KPI Health Check": {
25
+ "get_components": kpi_health_check_panel.get_page_components,
26
+ "description": "Détection KPI dégradés/persistants/résolus + drill-down + export.",
27
+ },
28
+ "⚡ KPI Health Check (V2)": {
29
+ "get_components": kpi_health_check_panel_v2.get_page_components,
30
+ "description": "Version optimisée (cache disque + moteur health-check vectorisé).",
31
+ },
32
+ }
33
+
34
+ HOME_PAGE = "🏠 Gallery"
35
+
36
+ page_sidebar_container = pn.Column(sizing_mode="stretch_width")
37
+ page_main_container = pn.Column(sizing_mode="stretch_both")
38
+
39
+ page_title = pn.pane.Markdown("", sizing_mode="stretch_width")
40
+ back_button = pn.widgets.Button(
41
+ name="← Back to gallery",
42
+ button_type="primary",
43
+ width=180,
44
+ )
45
+
46
+ home_button = pn.widgets.Button(
47
+ name=HOME_PAGE,
48
+ button_type="default",
49
+ width_policy="max",
50
+ )
51
+
52
+
53
+ def _load_page(page_name: str) -> None:
54
+ if page_name == HOME_PAGE:
55
+ page_title.object = "## Applications"
56
+
57
+ tiles = []
58
+ for title, meta in PAGES.items():
59
+ btn = pn.widgets.Button(name="Open", button_type="primary", width=120)
60
+ btn.on_click(lambda e, t=title: _load_page(t))
61
+
62
+ tile = pn.Column(
63
+ pn.pane.Markdown(f"### {title}\n\n{meta.get('description', '')}"),
64
+ btn,
65
+ sizing_mode="stretch_width",
66
+ margin=(10, 10, 10, 10),
67
+ )
68
+ tiles.append(tile)
69
+
70
+ gallery = pn.GridBox(*tiles, ncols=2, sizing_mode="stretch_width")
71
+ page_sidebar_container.objects = [
72
+ pn.pane.Markdown(
73
+ """### Bienvenue\n\nChoisis une application dans la gallery."""
74
+ )
75
+ ]
76
+ page_main_container.objects = [page_title, gallery]
77
+ return
78
+
79
+ meta = PAGES.get(page_name)
80
+ if meta is None:
81
+ page_sidebar_container.objects = [
82
+ pn.pane.Alert("Unknown page", alert_type="danger")
83
+ ]
84
+ page_main_container.objects = []
85
+ return
86
+
87
+ sidebar, main = meta["get_components"]()
88
+ page_title.object = f"## {page_name}"
89
+ page_sidebar_container.objects = [sidebar]
90
+ page_main_container.objects = [
91
+ pn.Row(back_button, pn.Spacer(), sizing_mode="stretch_width"),
92
+ page_title,
93
+ main,
94
+ ]
95
+
96
+
97
+ template = pn.template.MaterialTemplate(title="OML DB - Portal")
98
+
99
+
100
+ def _go_home(event=None) -> None:
101
+ _load_page(HOME_PAGE)
102
+
103
+
104
+ back_button.on_click(_go_home)
105
+ home_button.on_click(_go_home)
106
+
107
+ _load_page(HOME_PAGE)
108
+
109
+ template.sidebar.append(
110
+ pn.Column(
111
+ pn.pane.Markdown("## Navigation"),
112
+ home_button,
113
+ pn.layout.Divider(),
114
+ page_sidebar_container,
115
+ sizing_mode="stretch_width",
116
+ )
117
+ )
118
+
119
+ template.main.append(page_main_container)
120
+
121
+ template.servable()
panel_app/panel_v2_backend.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ from dataclasses import dataclass
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def _project_root() -> str:
9
+ return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+
11
+
12
+ def cache_root() -> str:
13
+ # Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev)
14
+ env_cache = os.environ.get("CACHE_DIR")
15
+ if env_cache:
16
+ path = os.path.join(env_cache, "panel_app_v2")
17
+ elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
18
+ # On Hugging Face Spaces and Linux containers, /tmp is always writable
19
+ path = os.path.join("/tmp", "panel_app_v2_cache")
20
+ else:
21
+ # Fallback to project root for local development
22
+ root = _project_root()
23
+ path = os.path.join(root, ".cache", "panel_app_v2")
24
+ os.makedirs(path, exist_ok=True)
25
+ return path
26
+
27
+
28
+ def _safe_str(value: object) -> str:
29
+ try:
30
+ return str(value or "")
31
+ except Exception:
32
+ return ""
33
+
34
+
35
+ def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
36
+ h = hashlib.blake2b(digest_size=16)
37
+ if file_bytes:
38
+ h.update(file_bytes)
39
+ name = _safe_str(filename)
40
+ if name:
41
+ h.update(name.encode("utf-8", errors="ignore"))
42
+ ex = _safe_str(extra)
43
+ if ex:
44
+ h.update(ex.encode("utf-8", errors="ignore"))
45
+ return h.hexdigest()
46
+
47
+
48
+ def _has_pyarrow() -> bool:
49
+ try:
50
+ import pyarrow # noqa: F401
51
+
52
+ return True
53
+ except Exception:
54
+ return False
55
+
56
+
57
+ def _has_duckdb() -> bool:
58
+ try:
59
+ import duckdb # noqa: F401
60
+
61
+ return True
62
+ except Exception:
63
+ return False
64
+
65
+
66
+ def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
67
+ if _has_pyarrow():
68
+ path = path_no_ext + ".parquet"
69
+ df.to_parquet(path, index=False)
70
+ return path
71
+ path = path_no_ext + ".pkl"
72
+ df.to_pickle(path)
73
+ return path
74
+
75
+
76
+ def read_table(path: str) -> pd.DataFrame:
77
+ if not path or not os.path.exists(path):
78
+ return pd.DataFrame()
79
+ p = str(path).lower()
80
+ if p.endswith(".parquet"):
81
+ return pd.read_parquet(path)
82
+ return pd.read_pickle(path)
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class CachedDataset:
87
+ dataset_id: str
88
+ rat: str
89
+ granularity: str
90
+
91
+ def base_dir(self) -> str:
92
+ return os.path.join(
93
+ cache_root(),
94
+ self.dataset_id,
95
+ f"rat={self.rat}",
96
+ f"granularity={self.granularity}",
97
+ )
98
+
99
+ def daily_table_base(self) -> str:
100
+ return os.path.join(self.base_dir(), "daily")
101
+
102
+ def meta_path(self) -> str:
103
+ return os.path.join(self.base_dir(), "meta.json")
104
+
105
+
106
+ def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
107
+ base = dataset.daily_table_base()
108
+ candidates = [base + ".parquet", base + ".pkl"]
109
+ for p in candidates:
110
+ if os.path.exists(p):
111
+ try:
112
+ df = read_table(p)
113
+ return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
114
+ except Exception:
115
+ return pd.DataFrame()
116
+ return None
117
+
118
+
119
+ def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
120
+ os.makedirs(dataset.base_dir(), exist_ok=True)
121
+ return write_table(daily, dataset.daily_table_base())
122
+
123
+
124
+ def ensure_duckdb_available() -> None:
125
+ if not _has_duckdb():
126
+ raise RuntimeError(
127
+ "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
128
+ )
panel_app/trafic_analysis_panel.py ADDED
@@ -0,0 +1,2459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import sys
4
+ import zipfile
5
+ from datetime import date, datetime, timedelta
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import panel as pn
10
+ import plotly.express as px
11
+
12
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
13
+ if ROOT_DIR not in sys.path:
14
+ sys.path.insert(0, ROOT_DIR)
15
+
16
+ from panel_app.convert_to_excel_panel import write_dfs_to_excel
17
+ from utils.utils_vars import get_physical_db
18
+
19
+ pn.extension(
20
+ "plotly",
21
+ "tabulator",
22
+ raw_css=[
23
+ ":fullscreen { background-color: white; overflow: auto; }",
24
+ "::backdrop { background-color: white; }",
25
+ ".plot-fullscreen-wrapper:fullscreen { padding: 20px; display: flex; flex-direction: column; }",
26
+ ".plot-fullscreen-wrapper:fullscreen > * { height: 100% !important; width: 100% !important; }",
27
+ ],
28
+ )
29
+
30
+
31
+ def read_fileinput_to_df(file_input: pn.widgets.FileInput) -> pd.DataFrame | None:
32
+ """Read a Panel FileInput (ZIP or CSV) into a DataFrame.
33
+
34
+ Returns None if no file is provided.
35
+ """
36
+ if file_input is None or not file_input.value:
37
+ return None
38
+
39
+ filename = (file_input.filename or "").lower()
40
+ data = io.BytesIO(file_input.value)
41
+
42
+ if filename.endswith(".zip"):
43
+ with zipfile.ZipFile(data) as z:
44
+ csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
45
+ if not csv_files:
46
+ raise ValueError("No CSV file found in the ZIP archive")
47
+ with z.open(csv_files[0]) as f:
48
+ return pd.read_csv(f, encoding="latin1", sep=";", low_memory=False)
49
+ elif filename.endswith(".csv"):
50
+ return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
51
+ else:
52
+ raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")
53
+
54
+
55
+ def extract_code(name):
56
+ name = name.replace(" ", "_") if isinstance(name, str) else None
57
+ if name and len(name) >= 10:
58
+ try:
59
+ return int(name.split("_")[0])
60
+ except ValueError:
61
+ return None
62
+ return None
63
+
64
+
65
+ def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
66
+ df = df[df["BCF name"].str.len() >= 10].copy()
67
+ df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
68
+ df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
69
+ df["code"] = df["BCF name"].apply(extract_code)
70
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
71
+ df = df[df["code"].notna()]
72
+ df["code"] = df["code"].astype(int)
73
+ date_format = (
74
+ "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
75
+ )
76
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
77
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
78
+
79
+ if "TCH availability ratio" in df.columns:
80
+ df["2g_tch_avail"] = pd.to_numeric(
81
+ df["TCH availability ratio"], errors="coerce"
82
+ )
83
+
84
+ agg_dict = {
85
+ "2g_data_trafic": "sum",
86
+ "2g_voice_trafic": "sum",
87
+ }
88
+ if "2g_tch_avail" in df.columns:
89
+ agg_dict["2g_tch_avail"] = "mean"
90
+
91
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
92
+ return df
93
+
94
+
95
+ def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
96
+ df = df[df["WBTS name"].str.len() >= 10].copy()
97
+ df["code"] = df["WBTS name"].apply(extract_code)
98
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
99
+ df = df[df["code"].notna()]
100
+ df["code"] = df["code"].astype(int)
101
+ date_format = (
102
+ "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
103
+ )
104
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
105
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
106
+ df.rename(
107
+ columns={
108
+ "Total CS traffic - Erl": "3g_voice_trafic",
109
+ "Total_Data_Traffic": "3g_data_trafic",
110
+ },
111
+ inplace=True,
112
+ )
113
+
114
+ kpi_col = None
115
+ for col in df.columns:
116
+ if "cell availability" in str(col).lower():
117
+ kpi_col = col
118
+ break
119
+
120
+ if kpi_col is not None:
121
+ df["3g_cell_avail"] = pd.to_numeric(df[kpi_col], errors="coerce")
122
+
123
+ agg_dict = {
124
+ "3g_voice_trafic": "sum",
125
+ "3g_data_trafic": "sum",
126
+ }
127
+ if "3g_cell_avail" in df.columns:
128
+ agg_dict["3g_cell_avail"] = "mean"
129
+
130
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
131
+ return df
132
+
133
+
134
+ def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
135
+ df = df[df["LNBTS name"].str.len() >= 10].copy()
136
+ df["lte_data_trafic"] = (
137
+ df["4G/LTE DL Traffic Volume (GBytes)"]
138
+ + df["4G/LTE UL Traffic Volume (GBytes)"]
139
+ )
140
+ df["code"] = df["LNBTS name"].apply(extract_code)
141
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
142
+ df = df[df["code"].notna()]
143
+ df["code"] = df["code"].astype(int)
144
+ date_format = (
145
+ "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
146
+ )
147
+ df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
148
+ df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
149
+ if "Cell Avail excl BLU" in df.columns:
150
+ df["lte_cell_avail"] = pd.to_numeric(df["Cell Avail excl BLU"], errors="coerce")
151
+
152
+ agg_dict = {"lte_data_trafic": "sum"}
153
+ if "lte_cell_avail" in df.columns:
154
+ agg_dict["lte_cell_avail"] = "mean"
155
+
156
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
157
+ return df
158
+
159
+
160
+ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):
161
+ physical_db = get_physical_db()
162
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
163
+ physical_db["code"] = (
164
+ pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
165
+ )
166
+ physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
167
+ physical_db = physical_db.drop_duplicates(subset="code")
168
+
169
+ df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
170
+ df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
171
+
172
+ for col in [
173
+ "2g_data_trafic",
174
+ "2g_voice_trafic",
175
+ "3g_voice_trafic",
176
+ "3g_data_trafic",
177
+ "lte_data_trafic",
178
+ ]:
179
+ if col not in df:
180
+ df[col] = 0
181
+
182
+ kpi_masks = {}
183
+ for kpi_col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]:
184
+ if kpi_col in df.columns:
185
+ kpi_masks[kpi_col] = df[kpi_col].notna()
186
+
187
+ df.fillna(0, inplace=True)
188
+
189
+ for kpi_col, mask in kpi_masks.items():
190
+ df.loc[~mask, kpi_col] = np.nan
191
+
192
+ df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
193
+ df["total_data_trafic"] = (
194
+ df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
195
+ )
196
+ df = pd.merge(df, physical_db, on=["code"], how="left")
197
+
198
+ pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
199
+ post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
200
+ last_period_start, last_period_end = (
201
+ pd.to_datetime(last_period_range[0]),
202
+ pd.to_datetime(last_period_range[1]),
203
+ )
204
+
205
+ last_period = df[
206
+ (df["date"] >= last_period_start) & (df["date"] <= last_period_end)
207
+ ]
208
+
209
+ def assign_period(x):
210
+ if pre_start <= x <= pre_end:
211
+ return "pre"
212
+ if post_start <= x <= post_end:
213
+ return "post"
214
+ return "other"
215
+
216
+ df["period"] = df["date"].apply(assign_period)
217
+
218
+ comparison = df[df["period"].isin(["pre", "post"])]
219
+
220
+ sum_pivot = (
221
+ comparison.groupby(["code", "period"])[
222
+ ["total_voice_trafic", "total_data_trafic"]
223
+ ]
224
+ .sum()
225
+ .unstack()
226
+ )
227
+ sum_pivot.columns = [f"{metric}_{period}" for metric, period in sum_pivot.columns]
228
+ sum_pivot = sum_pivot.reset_index()
229
+
230
+ sum_pivot["total_voice_trafic_diff"] = (
231
+ sum_pivot["total_voice_trafic_post"] - sum_pivot["total_voice_trafic_pre"]
232
+ )
233
+ sum_pivot["total_data_trafic_diff"] = (
234
+ sum_pivot["total_data_trafic_post"] - sum_pivot["total_data_trafic_pre"]
235
+ )
236
+
237
+ for metric in ["total_voice_trafic", "total_data_trafic"]:
238
+ sum_pivot[f"{metric}_diff_pct"] = (
239
+ (sum_pivot.get(f"{metric}_post", 0) - sum_pivot.get(f"{metric}_pre", 0))
240
+ / sum_pivot.get(f"{metric}_pre", 1)
241
+ ) * 100
242
+
243
+ sum_order = [
244
+ "code",
245
+ "total_voice_trafic_pre",
246
+ "total_voice_trafic_post",
247
+ "total_voice_trafic_diff",
248
+ "total_voice_trafic_diff_pct",
249
+ "total_data_trafic_pre",
250
+ "total_data_trafic_post",
251
+ "total_data_trafic_diff",
252
+ "total_data_trafic_diff_pct",
253
+ ]
254
+ sum_existing_cols = [col for col in sum_order if col in sum_pivot.columns]
255
+ sum_remaining_cols = [
256
+ col for col in sum_pivot.columns if col not in sum_existing_cols
257
+ ]
258
+ sum_pivot = sum_pivot[sum_existing_cols + sum_remaining_cols]
259
+
260
+ avg_pivot = (
261
+ comparison.groupby(["code", "period"])[
262
+ ["total_voice_trafic", "total_data_trafic"]
263
+ ]
264
+ .mean()
265
+ .unstack()
266
+ )
267
+ avg_pivot.columns = [f"{metric}_{period}" for metric, period in avg_pivot.columns]
268
+ avg_pivot = avg_pivot.reset_index()
269
+
270
+ avg_pivot["total_voice_trafic_diff"] = (
271
+ avg_pivot["total_voice_trafic_post"] - avg_pivot["total_voice_trafic_pre"]
272
+ )
273
+ avg_pivot["total_data_trafic_diff"] = (
274
+ avg_pivot["total_data_trafic_post"] - avg_pivot["total_data_trafic_pre"]
275
+ )
276
+
277
+ for metric in ["total_voice_trafic", "total_data_trafic"]:
278
+ avg_pivot[f"{metric}_diff_pct"] = (
279
+ (avg_pivot.get(f"{metric}_post", 0) - avg_pivot.get(f"{metric}_pre", 0))
280
+ / avg_pivot.get(f"{metric}_pre", 1)
281
+ ) * 100
282
+
283
+ avg_pivot = avg_pivot.rename(
284
+ columns={
285
+ "total_voice_trafic_pre": "avg_voice_trafic_pre",
286
+ "total_voice_trafic_post": "avg_voice_trafic_post",
287
+ "total_voice_trafic_diff": "avg_voice_trafic_diff",
288
+ "total_voice_trafic_diff_pct": "avg_voice_trafic_diff_pct",
289
+ "total_data_trafic_pre": "avg_data_trafic_pre",
290
+ "total_data_trafic_post": "avg_data_trafic_post",
291
+ "total_data_trafic_diff": "avg_data_trafic_diff",
292
+ "total_data_trafic_diff_pct": "avg_data_trafic_diff_pct",
293
+ }
294
+ )
295
+
296
+ avg_order = [
297
+ "code",
298
+ "avg_voice_trafic_pre",
299
+ "avg_voice_trafic_post",
300
+ "avg_voice_trafic_diff",
301
+ "avg_voice_trafic_diff_pct",
302
+ "avg_data_trafic_pre",
303
+ "avg_data_trafic_post",
304
+ "avg_data_trafic_diff",
305
+ "avg_data_trafic_diff_pct",
306
+ ]
307
+ avg_existing_cols = [col for col in avg_order if col in avg_pivot.columns]
308
+ avg_remaining_cols = [
309
+ col for col in avg_pivot.columns if col not in avg_existing_cols
310
+ ]
311
+ avg_pivot = avg_pivot[avg_existing_cols + avg_remaining_cols]
312
+
313
+ return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
314
+
315
+
316
+ def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
317
+ avail_col = "2g_tch_avail"
318
+
319
+ if avail_col not in df.columns or "period" not in df.columns:
320
+ return None, None
321
+
322
+ df_2g = df[df[avail_col].notna()].copy()
323
+ df_2g = df_2g[df_2g["period"].isin(["pre", "post"])]
324
+
325
+ if df_2g.empty:
326
+ return None, None
327
+
328
+ site_pivot = df_2g.groupby(["code", "period"])[avail_col].mean().unstack()
329
+
330
+ site_pivot = site_pivot.rename(
331
+ columns={"pre": "tch_avail_pre", "post": "tch_avail_post"}
332
+ )
333
+
334
+ if "tch_avail_pre" not in site_pivot.columns:
335
+ site_pivot["tch_avail_pre"] = pd.NA
336
+ if "tch_avail_post" not in site_pivot.columns:
337
+ site_pivot["tch_avail_post"] = pd.NA
338
+
339
+ site_pivot["tch_avail_diff"] = (
340
+ site_pivot["tch_avail_post"] - site_pivot["tch_avail_pre"]
341
+ )
342
+ site_pivot["pre_ok_vs_sla"] = site_pivot["tch_avail_pre"] >= sla_2g
343
+ site_pivot["post_ok_vs_sla"] = site_pivot["tch_avail_post"] >= sla_2g
344
+
345
+ site_pivot = site_pivot.reset_index()
346
+
347
+ summary_rows = []
348
+ for period_label, col_name in [
349
+ ("pre", "tch_avail_pre"),
350
+ ("post", "tch_avail_post"),
351
+ ]:
352
+ series = site_pivot[col_name].dropna()
353
+ total_cells = series.shape[0]
354
+ if total_cells == 0:
355
+ summary_rows.append(
356
+ {
357
+ "period": period_label,
358
+ "cells": 0,
359
+ "avg_availability": pd.NA,
360
+ "median_availability": pd.NA,
361
+ "p05_availability": pd.NA,
362
+ "p95_availability": pd.NA,
363
+ "min_availability": pd.NA,
364
+ "max_availability": pd.NA,
365
+ "cells_ge_sla": 0,
366
+ "cells_lt_sla": 0,
367
+ "pct_cells_ge_sla": pd.NA,
368
+ }
369
+ )
370
+ continue
371
+ cells_ge_sla = (series >= sla_2g).sum()
372
+ cells_lt_sla = (series < sla_2g).sum()
373
+ summary_rows.append(
374
+ {
375
+ "period": period_label,
376
+ "cells": int(total_cells),
377
+ "avg_availability": series.mean(),
378
+ "median_availability": series.median(),
379
+ "p05_availability": series.quantile(0.05),
380
+ "p95_availability": series.quantile(0.95),
381
+ "min_availability": series.min(),
382
+ "max_availability": series.max(),
383
+ "cells_ge_sla": int(cells_ge_sla),
384
+ "cells_lt_sla": int(cells_lt_sla),
385
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
386
+ }
387
+ )
388
+
389
+ summary_df = pd.DataFrame(summary_rows)
390
+
391
+ return summary_df, site_pivot
392
+
393
+
394
+ def analyze_3g_availability(df: pd.DataFrame, sla_3g: float):
395
+ avail_col = "3g_cell_avail"
396
+
397
+ if avail_col not in df.columns or "period" not in df.columns:
398
+ return None, None
399
+
400
+ df_3g = df[df[avail_col].notna()].copy()
401
+ df_3g = df_3g[df_3g["period"].isin(["pre", "post"])]
402
+
403
+ if df_3g.empty:
404
+ return None, None
405
+
406
+ site_pivot = df_3g.groupby(["code", "period"])[avail_col].mean().unstack()
407
+
408
+ site_pivot = site_pivot.rename(
409
+ columns={"pre": "cell_avail_pre", "post": "cell_avail_post"}
410
+ )
411
+
412
+ if "cell_avail_pre" not in site_pivot.columns:
413
+ site_pivot["cell_avail_pre"] = pd.NA
414
+ if "cell_avail_post" not in site_pivot.columns:
415
+ site_pivot["cell_avail_post"] = pd.NA
416
+
417
+ site_pivot["cell_avail_diff"] = (
418
+ site_pivot["cell_avail_post"] - site_pivot["cell_avail_pre"]
419
+ )
420
+ site_pivot["pre_ok_vs_sla"] = site_pivot["cell_avail_pre"] >= sla_3g
421
+ site_pivot["post_ok_vs_sla"] = site_pivot["cell_avail_post"] >= sla_3g
422
+
423
+ site_pivot = site_pivot.reset_index()
424
+
425
+ summary_rows = []
426
+ for period_label, col_name in [
427
+ ("pre", "cell_avail_pre"),
428
+ ("post", "cell_avail_post"),
429
+ ]:
430
+ series = site_pivot[col_name].dropna()
431
+ total_cells = series.shape[0]
432
+ if total_cells == 0:
433
+ summary_rows.append(
434
+ {
435
+ "period": period_label,
436
+ "cells": 0,
437
+ "avg_availability": pd.NA,
438
+ "median_availability": pd.NA,
439
+ "p05_availability": pd.NA,
440
+ "p95_availability": pd.NA,
441
+ "min_availability": pd.NA,
442
+ "max_availability": pd.NA,
443
+ "cells_ge_sla": 0,
444
+ "cells_lt_sla": 0,
445
+ "pct_cells_ge_sla": pd.NA,
446
+ }
447
+ )
448
+ continue
449
+ cells_ge_sla = (series >= sla_3g).sum()
450
+ cells_lt_sla = (series < sla_3g).sum()
451
+ summary_rows.append(
452
+ {
453
+ "period": period_label,
454
+ "cells": int(total_cells),
455
+ "avg_availability": series.mean(),
456
+ "median_availability": series.median(),
457
+ "p05_availability": series.quantile(0.05),
458
+ "p95_availability": series.quantile(0.95),
459
+ "min_availability": series.min(),
460
+ "max_availability": series.max(),
461
+ "cells_ge_sla": int(cells_ge_sla),
462
+ "cells_lt_sla": int(cells_lt_sla),
463
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
464
+ }
465
+ )
466
+
467
+ summary_df = pd.DataFrame(summary_rows)
468
+
469
+ return summary_df, site_pivot
470
+
471
+
472
+ def analyze_lte_availability(df: pd.DataFrame, sla_lte: float):
473
+ avail_col = "lte_cell_avail"
474
+
475
+ if avail_col not in df.columns or "period" not in df.columns:
476
+ return None, None
477
+
478
+ df_lte = df[df[avail_col].notna()].copy()
479
+ df_lte = df_lte[df_lte["period"].isin(["pre", "post"])]
480
+
481
+ if df_lte.empty:
482
+ return None, None
483
+
484
+ site_pivot = df_lte.groupby(["code", "period"])[avail_col].mean().unstack()
485
+
486
+ site_pivot = site_pivot.rename(
487
+ columns={"pre": "lte_avail_pre", "post": "lte_avail_post"}
488
+ )
489
+
490
+ if "lte_avail_pre" not in site_pivot.columns:
491
+ site_pivot["lte_avail_pre"] = pd.NA
492
+ if "lte_avail_post" not in site_pivot.columns:
493
+ site_pivot["lte_avail_post"] = pd.NA
494
+
495
+ site_pivot["lte_avail_diff"] = (
496
+ site_pivot["lte_avail_post"] - site_pivot["lte_avail_pre"]
497
+ )
498
+ site_pivot["pre_ok_vs_sla"] = site_pivot["lte_avail_pre"] >= sla_lte
499
+ site_pivot["post_ok_vs_sla"] = site_pivot["lte_avail_post"] >= sla_lte
500
+
501
+ site_pivot = site_pivot.reset_index()
502
+
503
+ summary_rows = []
504
+ for period_label, col_name in [
505
+ ("pre", "lte_avail_pre"),
506
+ ("post", "lte_avail_post"),
507
+ ]:
508
+ series = site_pivot[col_name].dropna()
509
+ total_cells = series.shape[0]
510
+ if total_cells == 0:
511
+ summary_rows.append(
512
+ {
513
+ "period": period_label,
514
+ "cells": 0,
515
+ "avg_availability": pd.NA,
516
+ "median_availability": pd.NA,
517
+ "p05_availability": pd.NA,
518
+ "p95_availability": pd.NA,
519
+ "min_availability": pd.NA,
520
+ "max_availability": pd.NA,
521
+ "cells_ge_sla": 0,
522
+ "cells_lt_sla": 0,
523
+ "pct_cells_ge_sla": pd.NA,
524
+ }
525
+ )
526
+ continue
527
+ cells_ge_sla = (series >= sla_lte).sum()
528
+ cells_lt_sla = (series < sla_lte).sum()
529
+ summary_rows.append(
530
+ {
531
+ "period": period_label,
532
+ "cells": int(total_cells),
533
+ "avg_availability": series.mean(),
534
+ "median_availability": series.median(),
535
+ "p05_availability": series.quantile(0.05),
536
+ "p95_availability": series.quantile(0.95),
537
+ "min_availability": series.min(),
538
+ "max_availability": series.max(),
539
+ "cells_ge_sla": int(cells_ge_sla),
540
+ "cells_lt_sla": int(cells_lt_sla),
541
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
542
+ }
543
+ )
544
+
545
+ summary_df = pd.DataFrame(summary_rows)
546
+
547
+ return summary_df, site_pivot
548
+
549
+
550
+ def analyze_multirat_availability(
551
+ df: pd.DataFrame, sla_2g: float, sla_3g: float, sla_lte: float
552
+ ):
553
+ if "period" not in df.columns:
554
+ return None
555
+
556
+ rat_cols = []
557
+ if "2g_tch_avail" in df.columns:
558
+ rat_cols.append("2g_tch_avail")
559
+ if "3g_cell_avail" in df.columns:
560
+ rat_cols.append("3g_cell_avail")
561
+ if "lte_cell_avail" in df.columns:
562
+ rat_cols.append("lte_cell_avail")
563
+
564
+ if not rat_cols:
565
+ return None
566
+
567
+ agg_dict = {col: "mean" for col in rat_cols}
568
+
569
+ df_pre = df[df["period"] == "pre"]
570
+ df_post = df[df["period"] == "post"]
571
+
572
+ pre = df_pre.groupby("code", as_index=False).agg(agg_dict)
573
+ post = df_post.groupby("code", as_index=False).agg(agg_dict)
574
+
575
+ rename_map_pre = {
576
+ "2g_tch_avail": "2g_avail_pre",
577
+ "3g_cell_avail": "3g_avail_pre",
578
+ "lte_cell_avail": "lte_avail_pre",
579
+ }
580
+ rename_map_post = {
581
+ "2g_tch_avail": "2g_avail_post",
582
+ "3g_cell_avail": "3g_avail_post",
583
+ "lte_cell_avail": "lte_avail_post",
584
+ }
585
+
586
+ pre = pre.rename(columns=rename_map_pre)
587
+ post = post.rename(columns=rename_map_post)
588
+
589
+ multi = pd.merge(pre, post, on="code", how="outer")
590
+
591
+ if not df_post.empty and {
592
+ "total_voice_trafic",
593
+ "total_data_trafic",
594
+ }.issubset(df_post.columns):
595
+ post_traffic = (
596
+ df_post.groupby("code", as_index=False)[
597
+ ["total_voice_trafic", "total_data_trafic"]
598
+ ]
599
+ .sum()
600
+ .rename(
601
+ columns={
602
+ "total_voice_trafic": "post_total_voice_trafic",
603
+ "total_data_trafic": "post_total_data_trafic",
604
+ }
605
+ )
606
+ )
607
+ multi = pd.merge(multi, post_traffic, on="code", how="left")
608
+
609
+ if "City" in df.columns:
610
+ city_df = df[["code", "City"]].drop_duplicates("code")
611
+ multi = pd.merge(multi, city_df, on="code", how="left")
612
+
613
+ def _ok_flag(series: pd.Series, sla: float) -> pd.Series:
614
+ if series.name not in multi.columns:
615
+ return pd.Series([pd.NA] * len(multi), index=multi.index)
616
+ ok = multi[series.name] >= sla
617
+ ok = ok.where(multi[series.name].notna(), pd.NA)
618
+ return ok
619
+
620
+ if "2g_avail_post" in multi.columns:
621
+ multi["ok_2g_post"] = _ok_flag(multi["2g_avail_post"], sla_2g)
622
+ if "3g_avail_post" in multi.columns:
623
+ multi["ok_3g_post"] = _ok_flag(multi["3g_avail_post"], sla_3g)
624
+ if "lte_avail_post" in multi.columns:
625
+ multi["ok_lte_post"] = _ok_flag(multi["lte_avail_post"], sla_lte)
626
+
627
+ def classify_row(row):
628
+ rats_status = []
629
+ for rat, col in [
630
+ ("2G", "ok_2g_post"),
631
+ ("3G", "ok_3g_post"),
632
+ ("LTE", "ok_lte_post"),
633
+ ]:
634
+ if col in row and not pd.isna(row[col]):
635
+ rats_status.append((rat, bool(row[col])))
636
+
637
+ if not rats_status:
638
+ return "No RAT data"
639
+
640
+ bad_rats = [rat for rat, ok in rats_status if not ok]
641
+ if not bad_rats:
642
+ return "OK all RAT"
643
+ if len(bad_rats) == 1:
644
+ return f"Degraded {bad_rats[0]} only"
645
+ return "Degraded multi-RAT (" + ",".join(bad_rats) + ")"
646
+
647
+ multi["post_multirat_status"] = multi.apply(classify_row, axis=1)
648
+
649
+ ordered_cols = ["code"]
650
+ if "City" in multi.columns:
651
+ ordered_cols.append("City")
652
+ for col in [
653
+ "2g_avail_pre",
654
+ "2g_avail_post",
655
+ "3g_avail_pre",
656
+ "3g_avail_post",
657
+ "lte_avail_pre",
658
+ "lte_avail_post",
659
+ "post_total_voice_trafic",
660
+ "post_total_data_trafic",
661
+ "ok_2g_post",
662
+ "ok_3g_post",
663
+ "ok_lte_post",
664
+ "post_multirat_status",
665
+ ]:
666
+ if col in multi.columns:
667
+ ordered_cols.append(col)
668
+
669
+ remaining_cols = [c for c in multi.columns if c not in ordered_cols]
670
+ multi = multi[ordered_cols + remaining_cols]
671
+
672
+ return multi
673
+
674
+
675
+ def analyze_persistent_availability(
676
+ df: pd.DataFrame,
677
+ multi_rat_df: pd.DataFrame,
678
+ sla_2g: float,
679
+ sla_3g: float,
680
+ sla_lte: float,
681
+ min_consecutive_days: int = 3,
682
+ ) -> pd.DataFrame:
683
+ if df is None or df.empty:
684
+ return pd.DataFrame()
685
+ if "date" not in df.columns or "code" not in df.columns:
686
+ return pd.DataFrame()
687
+
688
+ work_df = df.copy()
689
+ work_df["date_only"] = work_df["date"].dt.date
690
+
691
+ site_stats = {}
692
+
693
+ def _update_stats(rat_key_prefix: str, grouped: pd.DataFrame, sla: float) -> None:
694
+ if grouped.empty:
695
+ return
696
+ for code, group in grouped.groupby("code"):
697
+ group = group.sort_values("date_only")
698
+ dates = pd.to_datetime(group["date_only"]).tolist()
699
+ below_flags = (group["value"] < sla).tolist()
700
+ max_streak = 0
701
+ current_streak = 0
702
+ total_below = 0
703
+ last_date = None
704
+ for flag, current_date in zip(below_flags, dates):
705
+ if flag:
706
+ total_below += 1
707
+ if (
708
+ last_date is not None
709
+ and current_date == last_date + timedelta(days=1)
710
+ and current_streak > 0
711
+ ):
712
+ current_streak += 1
713
+ else:
714
+ current_streak = 1
715
+ if current_streak > max_streak:
716
+ max_streak = current_streak
717
+ else:
718
+ current_streak = 0
719
+ last_date = current_date
720
+ stats = site_stats.setdefault(
721
+ code,
722
+ {
723
+ "code": code,
724
+ "max_streak_2g": 0,
725
+ "max_streak_3g": 0,
726
+ "max_streak_lte": 0,
727
+ "below_days_2g": 0,
728
+ "below_days_3g": 0,
729
+ "below_days_lte": 0,
730
+ },
731
+ )
732
+ stats[f"max_streak_{rat_key_prefix}"] = max_streak
733
+ stats[f"below_days_{rat_key_prefix}"] = total_below
734
+
735
+ for rat_col, rat_key, sla in [
736
+ ("2g_tch_avail", "2g", sla_2g),
737
+ ("3g_cell_avail", "3g", sla_3g),
738
+ ("lte_cell_avail", "lte", sla_lte),
739
+ ]:
740
+ if rat_col in work_df.columns:
741
+ g = (
742
+ work_df.dropna(subset=[rat_col])
743
+ .groupby(["code", "date_only"])[rat_col]
744
+ .mean()
745
+ .reset_index()
746
+ )
747
+ g = g.rename(columns={rat_col: "value"})
748
+ _update_stats(rat_key, g, sla)
749
+
750
+ if not site_stats:
751
+ return pd.DataFrame()
752
+
753
+ rows = []
754
+ for code, s in site_stats.items():
755
+ max_2g = s.get("max_streak_2g", 0)
756
+ max_3g = s.get("max_streak_3g", 0)
757
+ max_lte = s.get("max_streak_lte", 0)
758
+ below_2g = s.get("below_days_2g", 0)
759
+ below_3g = s.get("below_days_3g", 0)
760
+ below_lte = s.get("below_days_lte", 0)
761
+ persistent_2g = max_2g >= min_consecutive_days if max_2g else False
762
+ persistent_3g = max_3g >= min_consecutive_days if max_3g else False
763
+ persistent_lte = max_lte >= min_consecutive_days if max_lte else False
764
+ total_below_any = below_2g + below_3g + below_lte
765
+ persistent_any = persistent_2g or persistent_3g or persistent_lte
766
+ rats_persistent_count = sum(
767
+ [persistent_2g is True, persistent_3g is True, persistent_lte is True]
768
+ )
769
+ rows.append(
770
+ {
771
+ "code": code,
772
+ "persistent_issue_2g": persistent_2g,
773
+ "persistent_issue_3g": persistent_3g,
774
+ "persistent_issue_lte": persistent_lte,
775
+ "max_consecutive_days_2g": max_2g,
776
+ "max_consecutive_days_3g": max_3g,
777
+ "max_consecutive_days_lte": max_lte,
778
+ "total_below_days_2g": below_2g,
779
+ "total_below_days_3g": below_3g,
780
+ "total_below_days_lte": below_lte,
781
+ "total_below_days_any": total_below_any,
782
+ "persistent_issue_any": persistent_any,
783
+ "persistent_rats_count": rats_persistent_count,
784
+ }
785
+ )
786
+
787
+ result = pd.DataFrame(rows)
788
+ result = result[result["persistent_issue_any"] == True]
789
+ if result.empty:
790
+ return result
791
+
792
+ if multi_rat_df is not None and not multi_rat_df.empty:
793
+ cols_to_merge = [
794
+ c
795
+ for c in [
796
+ "code",
797
+ "City",
798
+ "post_total_voice_trafic",
799
+ "post_total_data_trafic",
800
+ "post_multirat_status",
801
+ ]
802
+ if c in multi_rat_df.columns
803
+ ]
804
+ if cols_to_merge:
805
+ result = pd.merge(
806
+ result,
807
+ multi_rat_df[cols_to_merge].drop_duplicates("code"),
808
+ on="code",
809
+ how="left",
810
+ )
811
+
812
+ if "post_total_data_trafic" not in result.columns:
813
+ result["post_total_data_trafic"] = 0.0
814
+
815
+ result["criticity_score"] = (
816
+ result["post_total_data_trafic"].fillna(0) * 1.0
817
+ + result["total_below_days_any"].fillna(0) * 100.0
818
+ + result["persistent_rats_count"].fillna(0) * 1000.0
819
+ )
820
+
821
+ result = result.sort_values(
822
+ by=["criticity_score", "total_below_days_any"], ascending=[False, False]
823
+ )
824
+
825
+ return result
826
+
827
+
828
+ def monthly_data_analysis(df: pd.DataFrame):
829
+ df["date"] = pd.to_datetime(df["date"])
830
+ df["month_year"] = df["date"].dt.to_period("M").astype(str)
831
+
832
+ voice_trafic = df.pivot_table(
833
+ index="code",
834
+ columns="month_year",
835
+ values="total_voice_trafic",
836
+ aggfunc="sum",
837
+ fill_value=0,
838
+ )
839
+ voice_trafic = voice_trafic.reindex(sorted(voice_trafic.columns), axis=1)
840
+
841
+ data_trafic = df.pivot_table(
842
+ index="code",
843
+ columns="month_year",
844
+ values="total_data_trafic",
845
+ aggfunc="sum",
846
+ fill_value=0,
847
+ )
848
+ data_trafic = data_trafic.reindex(sorted(data_trafic.columns), axis=1)
849
+
850
+ return voice_trafic, data_trafic
851
+
852
+
853
+ # --------------------------------------------------------------------------------------
854
+ # Global state for drill-down views & export
855
+ # --------------------------------------------------------------------------------------
856
+
857
+ current_full_df: pd.DataFrame | None = None
858
+ current_last_period_df: pd.DataFrame | None = None
859
+ current_analysis_df: pd.DataFrame | None = None
860
+ current_analysis_last_period_df: pd.DataFrame | None = None
861
+
862
+ current_multi_rat_df: pd.DataFrame | None = None
863
+ current_persistent_df: pd.DataFrame | None = None
864
+
865
+ current_site_2g_avail: pd.DataFrame | None = None
866
+ current_site_3g_avail: pd.DataFrame | None = None
867
+ current_site_lte_avail: pd.DataFrame | None = None
868
+
869
+ current_summary_2g_avail: pd.DataFrame | None = None
870
+ current_summary_3g_avail: pd.DataFrame | None = None
871
+ current_summary_lte_avail: pd.DataFrame | None = None
872
+
873
+ current_monthly_voice_df: pd.DataFrame | None = None
874
+ current_monthly_data_df: pd.DataFrame | None = None
875
+ current_sum_pre_post_df: pd.DataFrame | None = None
876
+ current_avg_pre_post_df: pd.DataFrame | None = None
877
+ current_availability_summary_all_df: pd.DataFrame | None = None
878
+
879
+ current_export_multi_rat_df: pd.DataFrame | None = None
880
+ current_export_persistent_df: pd.DataFrame | None = None
881
+ current_export_bytes: bytes | None = None
882
+
883
+
884
+ # --------------------------------------------------------------------------------------
885
+ # Widgets
886
+ # --------------------------------------------------------------------------------------
887
+
888
+ PLOTLY_CONFIG = {"displaylogo": False, "scrollZoom": True, "displayModeBar": True}
889
+
890
+ file_2g = pn.widgets.FileInput(name="2G Traffic Report", accept=".csv,.zip")
891
+ file_3g = pn.widgets.FileInput(name="3G Traffic Report", accept=".csv,.zip")
892
+ file_lte = pn.widgets.FileInput(name="LTE Traffic Report", accept=".csv,.zip")
893
+
894
+ pre_range = pn.widgets.DateRangePicker(name="Pre-period (from - to)")
895
+ post_range = pn.widgets.DateRangePicker(name="Post-period (from - to)")
896
+ last_range = pn.widgets.DateRangePicker(name="Last period (from - to)")
897
+
898
+ sla_2g = pn.widgets.FloatInput(name="2G TCH availability SLA (%)", value=98.0, step=0.1)
899
+ sla_3g = pn.widgets.FloatInput(
900
+ name="3G Cell availability SLA (%)", value=98.0, step=0.1
901
+ )
902
+ sla_lte = pn.widgets.FloatInput(
903
+ name="LTE Cell availability SLA (%)", value=98.0, step=0.1
904
+ )
905
+
906
+ number_of_top_trafic_sites = pn.widgets.IntInput(
907
+ name="Number of top traffic sites", value=25
908
+ )
909
+
910
+ min_persistent_days_widget = pn.widgets.IntInput(
911
+ name="Minimum consecutive days below SLA to flag persistent issue",
912
+ value=3,
913
+ )
914
+
915
+ top_critical_n_widget = pn.widgets.IntInput(
916
+ name="Number of top critical sites to display", value=25
917
+ )
918
+
919
+ run_button = pn.widgets.Button(name="Run analysis", button_type="primary")
920
+
921
+ status_pane = pn.pane.Alert(
922
+ "Upload the 3 reports, select the 3 periods and click 'Run analysis'",
923
+ alert_type="primary",
924
+ )
925
+
926
+ summary_table = pn.widgets.Tabulator(
927
+ height=250,
928
+ sizing_mode="stretch_width",
929
+ layout="fit_data_table",
930
+ )
931
+
932
+ sum_pre_post_table = pn.widgets.Tabulator(
933
+ height=250,
934
+ sizing_mode="stretch_width",
935
+ layout="fit_data_table",
936
+ )
937
+ summary_2g_table = pn.widgets.Tabulator(
938
+ height=250,
939
+ sizing_mode="stretch_width",
940
+ layout="fit_data_table",
941
+ )
942
+ worst_2g_table = pn.widgets.Tabulator(
943
+ height=250,
944
+ sizing_mode="stretch_width",
945
+ layout="fit_data_table",
946
+ )
947
+ summary_3g_table = pn.widgets.Tabulator(
948
+ height=250,
949
+ sizing_mode="stretch_width",
950
+ layout="fit_data_table",
951
+ )
952
+ worst_3g_table = pn.widgets.Tabulator(
953
+ height=250,
954
+ sizing_mode="stretch_width",
955
+ layout="fit_data_table",
956
+ )
957
+ summary_lte_table = pn.widgets.Tabulator(
958
+ height=250,
959
+ sizing_mode="stretch_width",
960
+ layout="fit_data_table",
961
+ )
962
+ worst_lte_table = pn.widgets.Tabulator(
963
+ height=250,
964
+ sizing_mode="stretch_width",
965
+ layout="fit_data_table",
966
+ )
967
+ multi_rat_table = pn.widgets.Tabulator(
968
+ height=250,
969
+ sizing_mode="stretch_width",
970
+ layout="fit_data_table",
971
+ )
972
+ persistent_table = pn.widgets.Tabulator(
973
+ height=250,
974
+ sizing_mode="stretch_width",
975
+ layout="fit_data_table",
976
+ )
977
+
978
+ site_select = pn.widgets.AutocompleteInput(
979
+ name="Select a site for detailed view (Type to search)",
980
+ options={},
981
+ case_sensitive=False,
982
+ search_strategy="includes",
983
+ restrict=True,
984
+ placeholder="Type site code or city...",
985
+ )
986
+ site_traffic_plot_pane = pn.pane.Plotly(
987
+ sizing_mode="stretch_both",
988
+ config=PLOTLY_CONFIG,
989
+ css_classes=["fullscreen-target-site-traffic"],
990
+ )
991
+ site_traffic_plot = pn.Column(
992
+ site_traffic_plot_pane,
993
+ height=400,
994
+ sizing_mode="stretch_width",
995
+ css_classes=["plot-fullscreen-wrapper", "site-traffic-wrapper"],
996
+ )
997
+ site_avail_plot_pane = pn.pane.Plotly(
998
+ sizing_mode="stretch_both",
999
+ config=PLOTLY_CONFIG,
1000
+ css_classes=["fullscreen-target-site-avail"],
1001
+ )
1002
+ site_avail_plot = pn.Column(
1003
+ site_avail_plot_pane,
1004
+ height=400,
1005
+ sizing_mode="stretch_width",
1006
+ css_classes=["plot-fullscreen-wrapper", "site-avail-wrapper"],
1007
+ )
1008
+ site_degraded_table = pn.widgets.Tabulator(
1009
+ height=200,
1010
+ sizing_mode="stretch_width",
1011
+ layout="fit_data_table",
1012
+ )
1013
+
1014
+ city_select = pn.widgets.AutocompleteInput(
1015
+ name="Select a City for aggregated view (Type to search)",
1016
+ options=[],
1017
+ case_sensitive=False,
1018
+ search_strategy="includes",
1019
+ restrict=True,
1020
+ placeholder="Type city name...",
1021
+ )
1022
+ city_traffic_plot_pane = pn.pane.Plotly(
1023
+ sizing_mode="stretch_both",
1024
+ config=PLOTLY_CONFIG,
1025
+ css_classes=["fullscreen-target-city-traffic"],
1026
+ )
1027
+ city_traffic_plot = pn.Column(
1028
+ city_traffic_plot_pane,
1029
+ height=400,
1030
+ sizing_mode="stretch_width",
1031
+ css_classes=["plot-fullscreen-wrapper", "city-traffic-wrapper"],
1032
+ )
1033
+ city_avail_plot_pane = pn.pane.Plotly(
1034
+ sizing_mode="stretch_both",
1035
+ config=PLOTLY_CONFIG,
1036
+ css_classes=["fullscreen-target-city-avail"],
1037
+ )
1038
+ city_avail_plot = pn.Column(
1039
+ city_avail_plot_pane,
1040
+ height=400,
1041
+ sizing_mode="stretch_width",
1042
+ css_classes=["plot-fullscreen-wrapper", "city-avail-wrapper"],
1043
+ )
1044
+ city_degraded_table = pn.widgets.Tabulator(
1045
+ height=200,
1046
+ sizing_mode="stretch_width",
1047
+ layout="fit_data_table",
1048
+ )
1049
+
1050
+ daily_avail_plot_pane = pn.pane.Plotly(
1051
+ sizing_mode="stretch_both",
1052
+ config=PLOTLY_CONFIG,
1053
+ css_classes=["fullscreen-target-daily-avail"],
1054
+ )
1055
+ daily_avail_plot = pn.Column(
1056
+ daily_avail_plot_pane,
1057
+ height=400,
1058
+ sizing_mode="stretch_width",
1059
+ css_classes=["plot-fullscreen-wrapper", "daily-avail-wrapper"],
1060
+ )
1061
+ daily_degraded_table = pn.widgets.Tabulator(
1062
+ height=200,
1063
+ sizing_mode="stretch_width",
1064
+ layout="fit_data_table",
1065
+ )
1066
+
1067
+ top_data_sites_table = pn.widgets.Tabulator(
1068
+ height=250,
1069
+ sizing_mode="stretch_width",
1070
+ layout="fit_data_table",
1071
+ )
1072
+ top_voice_sites_table = pn.widgets.Tabulator(
1073
+ height=250,
1074
+ sizing_mode="stretch_width",
1075
+ layout="fit_data_table",
1076
+ )
1077
+ top_data_bar_plot_pane = pn.pane.Plotly(
1078
+ sizing_mode="stretch_both",
1079
+ config=PLOTLY_CONFIG,
1080
+ css_classes=["fullscreen-target-top-data"],
1081
+ )
1082
+ top_data_bar_plot = pn.Column(
1083
+ top_data_bar_plot_pane,
1084
+ height=400,
1085
+ sizing_mode="stretch_width",
1086
+ css_classes=["plot-fullscreen-wrapper", "top-data-bar-wrapper"],
1087
+ )
1088
+ top_voice_bar_plot_pane = pn.pane.Plotly(
1089
+ sizing_mode="stretch_both",
1090
+ config=PLOTLY_CONFIG,
1091
+ css_classes=["fullscreen-target-top-voice"],
1092
+ )
1093
+ top_voice_bar_plot = pn.Column(
1094
+ top_voice_bar_plot_pane,
1095
+ height=400,
1096
+ sizing_mode="stretch_width",
1097
+ css_classes=["plot-fullscreen-wrapper", "top-voice-bar-wrapper"],
1098
+ )
1099
+ data_map_plot_pane = pn.pane.Plotly(
1100
+ sizing_mode="stretch_both",
1101
+ config=PLOTLY_CONFIG,
1102
+ css_classes=["fullscreen-target-data-map"],
1103
+ )
1104
+ data_map_plot = pn.Column(
1105
+ data_map_plot_pane,
1106
+ height=500,
1107
+ sizing_mode="stretch_width",
1108
+ css_classes=["plot-fullscreen-wrapper", "data-map-wrapper"],
1109
+ )
1110
+ voice_map_plot_pane = pn.pane.Plotly(
1111
+ sizing_mode="stretch_both",
1112
+ config=PLOTLY_CONFIG,
1113
+ css_classes=["fullscreen-target-voice-map"],
1114
+ )
1115
+ voice_map_plot = pn.Column(
1116
+ voice_map_plot_pane,
1117
+ height=500,
1118
+ sizing_mode="stretch_width",
1119
+ css_classes=["plot-fullscreen-wrapper", "voice-map-wrapper"],
1120
+ )
1121
+
1122
+ # Fullscreen helper logic has been replaced by client-side JS.
1123
+
1124
+ # Fullscreen buttons for each Plotly plot
1125
+ site_traffic_fullscreen_btn = pn.widgets.Button(
1126
+ name="Full screen site traffic", button_type="default"
1127
+ )
1128
+ site_avail_fullscreen_btn = pn.widgets.Button(
1129
+ name="Full screen site availability", button_type="default"
1130
+ )
1131
+ city_traffic_fullscreen_btn = pn.widgets.Button(
1132
+ name="Full screen city traffic", button_type="default"
1133
+ )
1134
+ city_avail_fullscreen_btn = pn.widgets.Button(
1135
+ name="Full screen city availability", button_type="default"
1136
+ )
1137
+ daily_avail_fullscreen_btn = pn.widgets.Button(
1138
+ name="Full screen daily availability", button_type="default"
1139
+ )
1140
+ top_data_fullscreen_btn = pn.widgets.Button(
1141
+ name="Full screen top data bar", button_type="default"
1142
+ )
1143
+ top_voice_fullscreen_btn = pn.widgets.Button(
1144
+ name="Full screen top voice bar", button_type="default"
1145
+ )
1146
+ data_map_fullscreen_btn = pn.widgets.Button(
1147
+ name="Full screen data map", button_type="default"
1148
+ )
1149
+ voice_map_fullscreen_btn = pn.widgets.Button(
1150
+ name="Full screen voice map", button_type="default"
1151
+ )
1152
+
1153
+ multi_rat_download = pn.widgets.FileDownload(
1154
+ label="Download Multi-RAT table (CSV)",
1155
+ filename="multi_rat_availability.csv",
1156
+ button_type="default",
1157
+ )
1158
+
1159
+ persistent_download = pn.widgets.FileDownload(
1160
+ label="Download persistent issues (CSV)",
1161
+ filename="persistent_issues.csv",
1162
+ button_type="default",
1163
+ )
1164
+
1165
+ top_data_download = pn.widgets.FileDownload(
1166
+ label="Download top data sites (CSV)",
1167
+ filename="top_data_sites.csv",
1168
+ button_type="default",
1169
+ )
1170
+
1171
+ top_voice_download = pn.widgets.FileDownload(
1172
+ label="Download top voice sites (CSV)",
1173
+ filename="top_voice_sites.csv",
1174
+ button_type="default",
1175
+ )
1176
+
1177
+ export_button = pn.widgets.FileDownload(
1178
+ label="Download the Analysis Report",
1179
+ filename="Global_Trafic_Analysis_Report.xlsx",
1180
+ button_type="primary",
1181
+ )
1182
+
1183
+
1184
+ # --------------------------------------------------------------------------------------
1185
+ # Callback
1186
+ # --------------------------------------------------------------------------------------
1187
+
1188
+
1189
+ def _validate_date_range(rng: tuple[date, date] | list[date], label: str) -> None:
1190
+ if not rng or len(rng) != 2:
1191
+ raise ValueError(f"Please select 2 dates for {label}.")
1192
+ if rng[0] is None or rng[1] is None:
1193
+ raise ValueError(f"Please select valid dates for {label}.")
1194
+
1195
+
1196
+ def run_analysis(event=None): # event param required by on_click
1197
+ try:
1198
+ status_pane.object = "Running analysis..."
1199
+ status_pane.alert_type = "primary"
1200
+
1201
+ global current_full_df, current_last_period_df
1202
+ global current_analysis_df, current_analysis_last_period_df
1203
+ global current_multi_rat_df, current_persistent_df
1204
+ global current_site_2g_avail, current_site_3g_avail, current_site_lte_avail
1205
+ global \
1206
+ current_summary_2g_avail, \
1207
+ current_summary_3g_avail, \
1208
+ current_summary_lte_avail
1209
+ global current_monthly_voice_df, current_monthly_data_df
1210
+ global current_sum_pre_post_df, current_avg_pre_post_df
1211
+ global current_availability_summary_all_df
1212
+ global current_export_multi_rat_df, current_export_persistent_df
1213
+ global current_export_bytes
1214
+
1215
+ # Basic validations
1216
+ if not (file_2g.value and file_3g.value and file_lte.value):
1217
+ raise ValueError("Please upload all 3 traffic reports (2G, 3G, LTE).")
1218
+
1219
+ _validate_date_range(pre_range.value, "pre-period")
1220
+ _validate_date_range(post_range.value, "post-period")
1221
+ _validate_date_range(last_range.value, "last period")
1222
+
1223
+ # Simple check on overlapping pre/post (same logic as Streamlit version, but lighter)
1224
+ pre_start, pre_end = pre_range.value
1225
+ post_start, post_end = post_range.value
1226
+ if pre_start == post_start and pre_end == post_end:
1227
+ raise ValueError("Pre and post periods are the same.")
1228
+ if pre_start < post_start and pre_end > post_end:
1229
+ raise ValueError("Pre and post periods are overlapping.")
1230
+
1231
+ df_2g = read_fileinput_to_df(file_2g)
1232
+ df_3g = read_fileinput_to_df(file_3g)
1233
+ df_lte = read_fileinput_to_df(file_lte)
1234
+
1235
+ if df_2g is None or df_3g is None or df_lte is None:
1236
+ raise ValueError("Failed to read one or more input files.")
1237
+
1238
+ summary = pd.DataFrame(
1239
+ {
1240
+ "Dataset": ["2G", "3G", "LTE"],
1241
+ "Rows": [len(df_2g), len(df_3g), len(df_lte)],
1242
+ "Columns": [df_2g.shape[1], df_3g.shape[1], df_lte.shape[1]],
1243
+ }
1244
+ )
1245
+ summary_table.value = summary
1246
+
1247
+ df_2g_clean = preprocess_2g(df_2g)
1248
+ df_3g_clean = preprocess_3g(df_3g)
1249
+ df_lte_clean = preprocess_lte(df_lte)
1250
+
1251
+ full_df, last_period, sum_pre_post_analysis, avg_pre_post_analysis = (
1252
+ merge_and_compare(
1253
+ df_2g_clean,
1254
+ df_3g_clean,
1255
+ df_lte_clean,
1256
+ pre_range.value,
1257
+ post_range.value,
1258
+ last_range.value,
1259
+ )
1260
+ )
1261
+
1262
+ monthly_voice_df, monthly_data_df = monthly_data_analysis(full_df)
1263
+
1264
+ analysis_df = full_df
1265
+
1266
+ # Persist global state for later drill-down / export
1267
+ current_full_df = full_df
1268
+ current_last_period_df = last_period
1269
+ current_analysis_df = analysis_df
1270
+ current_analysis_last_period_df = last_period
1271
+ current_monthly_voice_df = monthly_voice_df
1272
+ current_monthly_data_df = monthly_data_df
1273
+ current_sum_pre_post_df = sum_pre_post_analysis
1274
+ current_avg_pre_post_df = avg_pre_post_analysis
1275
+
1276
+ sum_pre_post_table.value = sum_pre_post_analysis
1277
+
1278
+ summary_2g_avail, site_2g_avail = analyze_2g_availability(
1279
+ analysis_df, float(sla_2g.value)
1280
+ )
1281
+ if summary_2g_avail is not None:
1282
+ summary_2g_table.value = summary_2g_avail.round(2)
1283
+ worst_sites_2g = site_2g_avail.sort_values("tch_avail_post").head(25)
1284
+ worst_2g_table.value = worst_sites_2g.round(2)
1285
+ else:
1286
+ summary_2g_table.value = pd.DataFrame()
1287
+ worst_2g_table.value = pd.DataFrame()
1288
+
1289
+ current_summary_2g_avail = summary_2g_avail
1290
+ current_site_2g_avail = site_2g_avail if summary_2g_avail is not None else None
1291
+
1292
+ summary_3g_avail, site_3g_avail = analyze_3g_availability(
1293
+ analysis_df, float(sla_3g.value)
1294
+ )
1295
+ if summary_3g_avail is not None:
1296
+ summary_3g_table.value = summary_3g_avail.round(2)
1297
+ worst_sites_3g = site_3g_avail.sort_values("cell_avail_post").head(25)
1298
+ worst_3g_table.value = worst_sites_3g.round(2)
1299
+ else:
1300
+ summary_3g_table.value = pd.DataFrame()
1301
+ worst_3g_table.value = pd.DataFrame()
1302
+
1303
+ current_summary_3g_avail = summary_3g_avail
1304
+ current_site_3g_avail = site_3g_avail if summary_3g_avail is not None else None
1305
+
1306
+ summary_lte_avail, site_lte_avail = analyze_lte_availability(
1307
+ analysis_df, float(sla_lte.value)
1308
+ )
1309
+ if summary_lte_avail is not None:
1310
+ summary_lte_table.value = summary_lte_avail.round(2)
1311
+ worst_sites_lte = site_lte_avail.sort_values("lte_avail_post").head(25)
1312
+ worst_lte_table.value = worst_sites_lte.round(2)
1313
+ else:
1314
+ summary_lte_table.value = pd.DataFrame()
1315
+ worst_lte_table.value = pd.DataFrame()
1316
+
1317
+ current_summary_lte_avail = summary_lte_avail
1318
+ current_site_lte_avail = (
1319
+ site_lte_avail if summary_lte_avail is not None else None
1320
+ )
1321
+
1322
+ # Build availability summary across RATs for export
1323
+ availability_frames = []
1324
+ if summary_2g_avail is not None:
1325
+ tmp = summary_2g_avail.copy()
1326
+ tmp["RAT"] = "2G"
1327
+ availability_frames.append(tmp)
1328
+ if summary_3g_avail is not None:
1329
+ tmp = summary_3g_avail.copy()
1330
+ tmp["RAT"] = "3G"
1331
+ availability_frames.append(tmp)
1332
+ if summary_lte_avail is not None:
1333
+ tmp = summary_lte_avail.copy()
1334
+ tmp["RAT"] = "LTE"
1335
+ availability_frames.append(tmp)
1336
+
1337
+ current_availability_summary_all_df = (
1338
+ pd.concat(availability_frames, ignore_index=True)
1339
+ if availability_frames
1340
+ else pd.DataFrame()
1341
+ )
1342
+
1343
+ multi_rat_df = analyze_multirat_availability(
1344
+ analysis_df,
1345
+ float(sla_2g.value),
1346
+ float(sla_3g.value),
1347
+ float(sla_lte.value),
1348
+ )
1349
+ if multi_rat_df is not None:
1350
+ multi_rat_table.value = multi_rat_df.round(2)
1351
+ else:
1352
+ multi_rat_table.value = pd.DataFrame()
1353
+
1354
+ current_multi_rat_df = multi_rat_df if multi_rat_df is not None else None
1355
+
1356
+ # Persistent availability (UI uses configurable threshold, export keeps 3 days)
1357
+ persistent_df = pd.DataFrame()
1358
+ if multi_rat_df is not None:
1359
+ persistent_df = analyze_persistent_availability(
1360
+ analysis_df,
1361
+ multi_rat_df,
1362
+ float(sla_2g.value),
1363
+ float(sla_3g.value),
1364
+ float(sla_lte.value),
1365
+ int(min_persistent_days_widget.value),
1366
+ )
1367
+
1368
+ current_persistent_df = (
1369
+ persistent_df
1370
+ if persistent_df is not None and not persistent_df.empty
1371
+ else None
1372
+ )
1373
+
1374
+ # Export-specific multi-RAT & persistent (based on full_df as in Streamlit app)
1375
+ export_multi_rat_base = analyze_multirat_availability(
1376
+ full_df,
1377
+ float(sla_2g.value),
1378
+ float(sla_3g.value),
1379
+ float(sla_lte.value),
1380
+ )
1381
+ current_export_multi_rat_df = (
1382
+ export_multi_rat_base
1383
+ if export_multi_rat_base is not None
1384
+ else pd.DataFrame()
1385
+ )
1386
+
1387
+ export_persistent_tmp = pd.DataFrame()
1388
+ if export_multi_rat_base is not None:
1389
+ export_persistent_tmp = analyze_persistent_availability(
1390
+ full_df,
1391
+ export_multi_rat_base,
1392
+ float(sla_2g.value),
1393
+ float(sla_3g.value),
1394
+ float(sla_lte.value),
1395
+ 3,
1396
+ )
1397
+ current_export_persistent_df = (
1398
+ export_persistent_tmp
1399
+ if export_persistent_tmp is not None and not export_persistent_tmp.empty
1400
+ else pd.DataFrame()
1401
+ )
1402
+
1403
+ # Precompute export bytes so the download button is instant
1404
+ current_export_bytes = _build_export_bytes()
1405
+
1406
+ # Update export filename with timestamp for clarity
1407
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1408
+ export_button.filename = f"Global_Trafic_Analysis_Report_{timestamp}.xlsx"
1409
+
1410
+ # Update all drill-down & map views
1411
+ _update_site_controls()
1412
+ _update_city_controls()
1413
+ _update_daily_availability_view()
1414
+ _update_top_sites_and_maps()
1415
+ _update_persistent_table_view()
1416
+
1417
+ status_pane.alert_type = "success"
1418
+ status_pane.object = "Analysis completed."
1419
+
1420
+ except Exception as exc: # noqa: BLE001
1421
+ status_pane.alert_type = "danger"
1422
+ status_pane.object = f"Error: {exc}"
1423
+
1424
+
1425
+ run_button.on_click(run_analysis)
1426
+
1427
+
1428
+ def _update_site_controls() -> None:
1429
+ """Populate site selection widget based on current_analysis_df and refresh view."""
1430
+ if current_analysis_df is None or current_analysis_df.empty:
1431
+ site_select.options = {}
1432
+ site_select.value = None
1433
+ site_traffic_plot_pane.object = None
1434
+ site_avail_plot_pane.object = None
1435
+ site_degraded_table.value = pd.DataFrame()
1436
+ return
1437
+
1438
+ sites_df = (
1439
+ current_analysis_df[["code", "City"]]
1440
+ .drop_duplicates()
1441
+ .sort_values(by=["City", "code"])
1442
+ )
1443
+
1444
+ options: dict[str, int] = {}
1445
+ for _, row in sites_df.iterrows():
1446
+ label = (
1447
+ f"{row['City']}_{row['code']}"
1448
+ if pd.notna(row["City"])
1449
+ else str(row["code"])
1450
+ )
1451
+ options[label] = int(row["code"])
1452
+
1453
+ site_select.options = options
1454
+ if options and site_select.value not in options.values():
1455
+ # When options is a dict, Select.value is the mapped value (code)
1456
+ site_select.value = next(iter(options.values()))
1457
+
1458
+ _update_site_view()
1459
+
1460
+
1461
+ def _update_site_view(event=None) -> None: # noqa: D401, ARG001
1462
+ """Update site drill-down plots and table from current_analysis_df and site_select."""
1463
+ if current_analysis_df is None or current_analysis_df.empty:
1464
+ site_traffic_plot_pane.object = None
1465
+ site_avail_plot_pane.object = None
1466
+ site_degraded_table.value = pd.DataFrame()
1467
+ return
1468
+
1469
+ selected_code = site_select.value
1470
+ if selected_code is None:
1471
+ site_traffic_plot_pane.object = None
1472
+ site_avail_plot_pane.object = None
1473
+ site_degraded_table.value = pd.DataFrame()
1474
+ return
1475
+
1476
+ site_detail_df = current_analysis_df[
1477
+ current_analysis_df["code"] == int(selected_code)
1478
+ ].copy()
1479
+ if site_detail_df.empty:
1480
+ site_traffic_plot_pane.object = None
1481
+ site_avail_plot_pane.object = None
1482
+ site_degraded_table.value = pd.DataFrame()
1483
+ return
1484
+
1485
+ site_detail_df = site_detail_df.sort_values("date")
1486
+
1487
+ # Traffic over time
1488
+ traffic_cols = [
1489
+ col
1490
+ for col in ["total_voice_trafic", "total_data_trafic"]
1491
+ if col in site_detail_df.columns
1492
+ ]
1493
+ first_row = site_detail_df.iloc[0]
1494
+ site_label = f"{first_row['code']}"
1495
+ if pd.notna(first_row.get("City")):
1496
+ site_label += f" ({first_row['City']})"
1497
+
1498
+ if traffic_cols:
1499
+ traffic_long = site_detail_df[["date"] + traffic_cols].melt(
1500
+ id_vars="date",
1501
+ value_vars=traffic_cols,
1502
+ var_name="metric",
1503
+ value_name="value",
1504
+ )
1505
+ fig_traffic = px.line(
1506
+ traffic_long,
1507
+ x="date",
1508
+ y="value",
1509
+ color="metric",
1510
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1511
+ )
1512
+ fig_traffic.update_layout(
1513
+ title=f"Traffic Evolution - Site: {site_label}",
1514
+ template="plotly_white",
1515
+ plot_bgcolor="white",
1516
+ paper_bgcolor="white",
1517
+ )
1518
+ site_traffic_plot_pane.object = fig_traffic
1519
+ else:
1520
+ site_traffic_plot_pane.object = None
1521
+
1522
+ # Availability over time per RAT
1523
+ avail_cols: list[str] = []
1524
+ rename_map: dict[str, str] = {}
1525
+ if "2g_tch_avail" in site_detail_df.columns:
1526
+ avail_cols.append("2g_tch_avail")
1527
+ rename_map["2g_tch_avail"] = "2G"
1528
+ if "3g_cell_avail" in site_detail_df.columns:
1529
+ avail_cols.append("3g_cell_avail")
1530
+ rename_map["3g_cell_avail"] = "3G"
1531
+ if "lte_cell_avail" in site_detail_df.columns:
1532
+ avail_cols.append("lte_cell_avail")
1533
+ rename_map["lte_cell_avail"] = "LTE"
1534
+
1535
+ if avail_cols:
1536
+ avail_df = site_detail_df[["date"] + avail_cols].copy()
1537
+ avail_df = avail_df.rename(columns=rename_map)
1538
+ value_cols = [c for c in avail_df.columns if c != "date"]
1539
+ avail_long = avail_df.melt(
1540
+ id_vars="date",
1541
+ value_vars=value_cols,
1542
+ var_name="RAT",
1543
+ value_name="availability",
1544
+ )
1545
+ fig_avail = px.line(
1546
+ avail_long,
1547
+ x="date",
1548
+ y="availability",
1549
+ color="RAT",
1550
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1551
+ )
1552
+ fig_avail.update_layout(
1553
+ title=f"Availability vs SLA - Site: {site_label}",
1554
+ template="plotly_white",
1555
+ plot_bgcolor="white",
1556
+ paper_bgcolor="white",
1557
+ )
1558
+ site_avail_plot_pane.object = fig_avail
1559
+
1560
+ # Days with availability below SLA per RAT
1561
+ site_detail_df["date_only"] = site_detail_df["date"].dt.date
1562
+ degraded_rows_site: list[dict] = []
1563
+ for rat_col, rat_name, sla_value in [
1564
+ ("2g_tch_avail", "2G", float(sla_2g.value)),
1565
+ ("3g_cell_avail", "3G", float(sla_3g.value)),
1566
+ ("lte_cell_avail", "LTE", float(sla_lte.value)),
1567
+ ]:
1568
+ if rat_col in site_detail_df.columns:
1569
+ daily_site = (
1570
+ site_detail_df.groupby("date_only")[rat_col].mean().dropna()
1571
+ )
1572
+ mask = daily_site < sla_value
1573
+ for d, val in daily_site[mask].items():
1574
+ degraded_rows_site.append(
1575
+ {
1576
+ "RAT": rat_name,
1577
+ "date": d,
1578
+ "avg_availability": val,
1579
+ "SLA": sla_value,
1580
+ }
1581
+ )
1582
+ if degraded_rows_site:
1583
+ degraded_site_df = pd.DataFrame(degraded_rows_site)
1584
+ site_degraded_table.value = degraded_site_df.round(2)
1585
+ else:
1586
+ site_degraded_table.value = pd.DataFrame()
1587
+ else:
1588
+ site_avail_plot_pane.object = None
1589
+ site_degraded_table.value = pd.DataFrame()
1590
+
1591
+
1592
+ def _update_city_controls() -> None:
1593
+ """Populate city selection widget based on current_analysis_df and refresh view."""
1594
+ if current_analysis_df is None or current_analysis_df.empty:
1595
+ city_select.options = []
1596
+ city_select.value = None
1597
+ city_traffic_plot_pane.object = None
1598
+ city_avail_plot_pane.object = None
1599
+ city_degraded_table.value = pd.DataFrame()
1600
+ return
1601
+
1602
+ if (
1603
+ "City" not in current_analysis_df.columns
1604
+ or not current_analysis_df["City"].notna().any()
1605
+ ):
1606
+ city_select.options = []
1607
+ city_select.value = None
1608
+ city_traffic_plot_pane.object = None
1609
+ city_avail_plot_pane.object = pd.DataFrame()
1610
+ city_degraded_table.value = pd.DataFrame()
1611
+ return
1612
+
1613
+ cities_df = (
1614
+ current_analysis_df[["City"]].dropna().drop_duplicates().sort_values(by="City")
1615
+ )
1616
+ options = cities_df["City"].tolist()
1617
+ city_select.options = options
1618
+ if options and city_select.value not in options:
1619
+ city_select.value = options[0]
1620
+
1621
+ _update_city_view()
1622
+
1623
+
1624
+ def _update_city_view(event=None) -> None: # noqa: D401, ARG001
1625
+ """Update city drill-down plots and degraded days table based on city_select."""
1626
+ if current_analysis_df is None or current_analysis_df.empty:
1627
+ city_traffic_plot_pane.object = None
1628
+ city_avail_plot_pane.object = None
1629
+ city_degraded_table.value = pd.DataFrame()
1630
+ return
1631
+
1632
+ selected_city = city_select.value
1633
+ if not selected_city:
1634
+ city_traffic_plot_pane.object = None
1635
+ city_avail_plot_pane.object = None
1636
+ city_degraded_table.value = pd.DataFrame()
1637
+ return
1638
+
1639
+ city_detail_df = current_analysis_df[
1640
+ current_analysis_df["City"] == selected_city
1641
+ ].copy()
1642
+ if city_detail_df.empty:
1643
+ city_traffic_plot_pane.object = None
1644
+ city_avail_plot_pane.object = None
1645
+ city_degraded_table.value = pd.DataFrame()
1646
+ return
1647
+
1648
+ city_detail_df = city_detail_df.sort_values("date")
1649
+
1650
+ # Traffic aggregated at city level
1651
+ traffic_cols_city = [
1652
+ col
1653
+ for col in ["total_voice_trafic", "total_data_trafic"]
1654
+ if col in city_detail_df.columns
1655
+ ]
1656
+ if traffic_cols_city:
1657
+ city_traffic = (
1658
+ city_detail_df.groupby("date")[traffic_cols_city].sum().reset_index()
1659
+ )
1660
+ traffic_long_city = city_traffic.melt(
1661
+ id_vars="date",
1662
+ value_vars=traffic_cols_city,
1663
+ var_name="metric",
1664
+ value_name="value",
1665
+ )
1666
+ fig_traffic_city = px.line(
1667
+ traffic_long_city,
1668
+ x="date",
1669
+ y="value",
1670
+ color="metric",
1671
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1672
+ )
1673
+ fig_traffic_city.update_layout(
1674
+ title=f"Total Traffic Evolution - City: {selected_city}",
1675
+ template="plotly_white",
1676
+ plot_bgcolor="white",
1677
+ paper_bgcolor="white",
1678
+ )
1679
+ city_traffic_plot_pane.object = fig_traffic_city
1680
+ else:
1681
+ city_traffic_plot_pane.object = None
1682
+
1683
+ # Availability aggregated at city level
1684
+ avail_cols_city: list[str] = []
1685
+ rename_map_city: dict[str, str] = {}
1686
+ if "2g_tch_avail" in city_detail_df.columns:
1687
+ avail_cols_city.append("2g_tch_avail")
1688
+ rename_map_city["2g_tch_avail"] = "2G"
1689
+ if "3g_cell_avail" in city_detail_df.columns:
1690
+ avail_cols_city.append("3g_cell_avail")
1691
+ rename_map_city["3g_cell_avail"] = "3G"
1692
+ if "lte_cell_avail" in city_detail_df.columns:
1693
+ avail_cols_city.append("lte_cell_avail")
1694
+ rename_map_city["lte_cell_avail"] = "LTE"
1695
+
1696
+ if avail_cols_city:
1697
+ avail_city_df = city_detail_df[["date"] + avail_cols_city].copy()
1698
+ avail_city_df = avail_city_df.rename(columns=rename_map_city)
1699
+ value_cols_city = [c for c in avail_city_df.columns if c != "date"]
1700
+ avail_long_city = avail_city_df.melt(
1701
+ id_vars="date",
1702
+ value_vars=value_cols_city,
1703
+ var_name="RAT",
1704
+ value_name="availability",
1705
+ )
1706
+ fig_avail_city = px.line(
1707
+ avail_long_city,
1708
+ x="date",
1709
+ y="availability",
1710
+ color="RAT",
1711
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1712
+ )
1713
+ fig_avail_city.update_layout(
1714
+ title=f"Availability vs SLA - City: {selected_city}",
1715
+ template="plotly_white",
1716
+ plot_bgcolor="white",
1717
+ paper_bgcolor="white",
1718
+ )
1719
+ city_avail_plot_pane.object = fig_avail_city
1720
+
1721
+ city_detail_df["date_only"] = city_detail_df["date"].dt.date
1722
+ degraded_rows_city: list[dict] = []
1723
+ for rat_col, rat_name, sla_value in [
1724
+ ("2g_tch_avail", "2G", float(sla_2g.value)),
1725
+ ("3g_cell_avail", "3G", float(sla_3g.value)),
1726
+ ("lte_cell_avail", "LTE", float(sla_lte.value)),
1727
+ ]:
1728
+ if rat_col in city_detail_df.columns:
1729
+ daily_city = (
1730
+ city_detail_df.groupby("date_only")[rat_col].mean().dropna()
1731
+ )
1732
+ mask_city = daily_city < sla_value
1733
+ for d, val in daily_city[mask_city].items():
1734
+ degraded_rows_city.append(
1735
+ {
1736
+ "RAT": rat_name,
1737
+ "date": d,
1738
+ "avg_availability": val,
1739
+ "SLA": sla_value,
1740
+ }
1741
+ )
1742
+ if degraded_rows_city:
1743
+ degraded_city_df = pd.DataFrame(degraded_rows_city)
1744
+ city_degraded_table.value = degraded_city_df.round(2)
1745
+ else:
1746
+ city_degraded_table.value = pd.DataFrame()
1747
+ else:
1748
+ city_avail_plot_pane.object = None
1749
+ city_degraded_table.value = pd.DataFrame()
1750
+
1751
+
1752
+ def _update_daily_availability_view() -> None:
1753
+ """Daily average availability per RAT over the full analysis_df."""
1754
+ if current_analysis_df is None or current_analysis_df.empty:
1755
+ daily_avail_plot_pane.object = None
1756
+ daily_degraded_table.value = pd.DataFrame()
1757
+ return
1758
+
1759
+ temp_df = current_analysis_df.copy()
1760
+ if not any(
1761
+ col in temp_df.columns
1762
+ for col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]
1763
+ ):
1764
+ daily_avail_plot_pane.object = None
1765
+ daily_degraded_table.value = pd.DataFrame()
1766
+ return
1767
+
1768
+ temp_df["date_only"] = temp_df["date"].dt.date
1769
+
1770
+ agg_dict: dict[str, str] = {}
1771
+ if "2g_tch_avail" in temp_df.columns:
1772
+ agg_dict["2g_tch_avail"] = "mean"
1773
+ if "3g_cell_avail" in temp_df.columns:
1774
+ agg_dict["3g_cell_avail"] = "mean"
1775
+ if "lte_cell_avail" in temp_df.columns:
1776
+ agg_dict["lte_cell_avail"] = "mean"
1777
+
1778
+ daily_avail = (
1779
+ temp_df.groupby("date_only", as_index=False).agg(agg_dict)
1780
+ if agg_dict
1781
+ else pd.DataFrame()
1782
+ )
1783
+
1784
+ if daily_avail.empty:
1785
+ daily_avail_plot_pane.object = None
1786
+ daily_degraded_table.value = pd.DataFrame()
1787
+ return
1788
+
1789
+ rename_map: dict[str, str] = {}
1790
+ if "2g_tch_avail" in daily_avail.columns:
1791
+ rename_map["2g_tch_avail"] = "2G"
1792
+ if "3g_cell_avail" in daily_avail.columns:
1793
+ rename_map["3g_cell_avail"] = "3G"
1794
+ if "lte_cell_avail" in daily_avail.columns:
1795
+ rename_map["lte_cell_avail"] = "LTE"
1796
+
1797
+ daily_avail = daily_avail.rename(columns=rename_map)
1798
+
1799
+ value_cols = [c for c in daily_avail.columns if c != "date_only"]
1800
+ if not value_cols:
1801
+ daily_avail_plot_pane.object = None
1802
+ daily_degraded_table.value = pd.DataFrame()
1803
+ return
1804
+
1805
+ daily_melt = daily_avail.melt(
1806
+ id_vars="date_only",
1807
+ value_vars=value_cols,
1808
+ var_name="RAT",
1809
+ value_name="availability",
1810
+ )
1811
+
1812
+ fig = px.line(
1813
+ daily_melt,
1814
+ x="date_only",
1815
+ y="availability",
1816
+ color="RAT",
1817
+ markers=True,
1818
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1819
+ )
1820
+ fig.update_layout(
1821
+ template="plotly_white",
1822
+ plot_bgcolor="white",
1823
+ paper_bgcolor="white",
1824
+ )
1825
+ daily_avail_plot_pane.object = fig
1826
+
1827
+ degraded_rows: list[dict] = []
1828
+ for rat_name, sla_value in [
1829
+ ("2G", float(sla_2g.value)),
1830
+ ("3G", float(sla_3g.value)),
1831
+ ("LTE", float(sla_lte.value)),
1832
+ ]:
1833
+ if rat_name in daily_avail.columns:
1834
+ series = daily_avail[rat_name]
1835
+ mask = series < sla_value
1836
+ for d, val in zip(daily_avail.loc[mask, "date_only"], series[mask]):
1837
+ degraded_rows.append(
1838
+ {
1839
+ "RAT": rat_name,
1840
+ "date": d,
1841
+ "avg_availability": val,
1842
+ "SLA": sla_value,
1843
+ }
1844
+ )
1845
+
1846
+ if degraded_rows:
1847
+ degraded_df = pd.DataFrame(degraded_rows)
1848
+ daily_degraded_table.value = degraded_df.round(2)
1849
+ else:
1850
+ daily_degraded_table.value = pd.DataFrame()
1851
+
1852
+
1853
+ def _update_top_sites_and_maps() -> None:
1854
+ """Top traffic sites and geographic maps based on last analysis period."""
1855
+ if current_analysis_last_period_df is None or current_analysis_last_period_df.empty:
1856
+ top_data_sites_table.value = pd.DataFrame()
1857
+ top_voice_sites_table.value = pd.DataFrame()
1858
+ top_data_bar_plot_pane.object = None
1859
+ top_voice_bar_plot_pane.object = None
1860
+ data_map_plot_pane.object = None
1861
+ voice_map_plot_pane.object = None
1862
+ return
1863
+
1864
+ df = current_analysis_last_period_df
1865
+ n = int(number_of_top_trafic_sites.value or 25)
1866
+
1867
+ # Top sites by data traffic
1868
+ top_sites = (
1869
+ df.groupby(["code", "City"])["total_data_trafic"]
1870
+ .sum()
1871
+ .sort_values(ascending=False)
1872
+ .head(n)
1873
+ )
1874
+ top_data_sites_table.value = top_sites.sort_values(ascending=True).reset_index()
1875
+
1876
+ fig_data = px.bar(
1877
+ top_sites.reset_index(),
1878
+ y=top_sites.reset_index()[["City", "code"]].agg(
1879
+ lambda x: "_".join(map(str, x)), axis=1
1880
+ ),
1881
+ x="total_data_trafic",
1882
+ title=f"Top {n} sites by data traffic",
1883
+ orientation="h",
1884
+ text="total_data_trafic",
1885
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1886
+ )
1887
+ fig_data.update_layout(
1888
+ template="plotly_white",
1889
+ plot_bgcolor="white",
1890
+ paper_bgcolor="white",
1891
+ )
1892
+ top_data_bar_plot_pane.object = fig_data
1893
+
1894
+ # Top sites by voice traffic
1895
+ top_sites_voice = (
1896
+ df.groupby(["code", "City"])["total_voice_trafic"]
1897
+ .sum()
1898
+ .sort_values(ascending=False)
1899
+ .head(n)
1900
+ )
1901
+ top_voice_sites_table.value = top_sites_voice.sort_values(
1902
+ ascending=True
1903
+ ).reset_index()
1904
+
1905
+ fig_voice = px.bar(
1906
+ top_sites_voice.reset_index(),
1907
+ y=top_sites_voice.reset_index()[["City", "code"]].agg(
1908
+ lambda x: "_".join(map(str, x)), axis=1
1909
+ ),
1910
+ x="total_voice_trafic",
1911
+ title=f"Top {n} sites by voice traffic",
1912
+ orientation="h",
1913
+ text="total_voice_trafic",
1914
+ color_discrete_sequence=px.colors.qualitative.Plotly,
1915
+ )
1916
+ fig_voice.update_layout(
1917
+ template="plotly_white",
1918
+ plot_bgcolor="white",
1919
+ paper_bgcolor="white",
1920
+ )
1921
+ top_voice_bar_plot_pane.object = fig_voice
1922
+
1923
+ # Maps
1924
+ if {"Latitude", "Longitude"}.issubset(df.columns):
1925
+ min_size = 5
1926
+ max_size = 40
1927
+
1928
+ # Data traffic map
1929
+ df_data = (
1930
+ df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
1931
+ .sum()
1932
+ .reset_index()
1933
+ )
1934
+ if not df_data.empty:
1935
+ traffic_data_min = df_data["total_data_trafic"].min()
1936
+ traffic_data_max = df_data["total_data_trafic"].max()
1937
+ if traffic_data_max > traffic_data_min:
1938
+ df_data["bubble_size"] = df_data["total_data_trafic"].apply(
1939
+ lambda x: min_size
1940
+ + (max_size - min_size)
1941
+ * (x - traffic_data_min)
1942
+ / (traffic_data_max - traffic_data_min)
1943
+ )
1944
+ else:
1945
+ df_data["bubble_size"] = min_size
1946
+
1947
+ custom_blue_red = [
1948
+ [0.0, "#4292c6"],
1949
+ [0.2, "#2171b5"],
1950
+ [0.4, "#084594"],
1951
+ [0.6, "#cb181d"],
1952
+ [0.8, "#a50f15"],
1953
+ [1.0, "#67000d"],
1954
+ ]
1955
+
1956
+ fig_map_data = px.scatter_map(
1957
+ df_data,
1958
+ lat="Latitude",
1959
+ lon="Longitude",
1960
+ color="total_data_trafic",
1961
+ size="bubble_size",
1962
+ color_continuous_scale=custom_blue_red,
1963
+ size_max=max_size,
1964
+ zoom=10,
1965
+ height=600,
1966
+ title="Data traffic distribution",
1967
+ hover_data={"code": True, "total_data_trafic": True},
1968
+ hover_name="code",
1969
+ text=[str(x) for x in df_data["code"]],
1970
+ )
1971
+ fig_map_data.update_layout(
1972
+ mapbox_style="open-street-map",
1973
+ coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
1974
+ coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
1975
+ font=dict(size=10, color="black"),
1976
+ )
1977
+ data_map_plot_pane.object = fig_map_data
1978
+ else:
1979
+ data_map_plot_pane.object = None
1980
+
1981
+ # Voice traffic map
1982
+ df_voice = (
1983
+ df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
1984
+ .sum()
1985
+ .reset_index()
1986
+ )
1987
+ if not df_voice.empty:
1988
+ traffic_voice_min = df_voice["total_voice_trafic"].min()
1989
+ traffic_voice_max = df_voice["total_voice_trafic"].max()
1990
+ if traffic_voice_max > traffic_voice_min:
1991
+ df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
1992
+ lambda x: min_size
1993
+ + (max_size - min_size)
1994
+ * (x - traffic_voice_min)
1995
+ / (traffic_voice_max - traffic_voice_min)
1996
+ )
1997
+ else:
1998
+ df_voice["bubble_size"] = min_size
1999
+
2000
+ custom_blue_red = [
2001
+ [0.0, "#4292c6"],
2002
+ [0.2, "#2171b5"],
2003
+ [0.4, "#084594"],
2004
+ [0.6, "#cb181d"],
2005
+ [0.8, "#a50f15"],
2006
+ [1.0, "#67000d"],
2007
+ ]
2008
+
2009
+ fig_map_voice = px.scatter_map(
2010
+ df_voice,
2011
+ lat="Latitude",
2012
+ lon="Longitude",
2013
+ color="total_voice_trafic",
2014
+ size="bubble_size",
2015
+ color_continuous_scale=custom_blue_red,
2016
+ size_max=max_size,
2017
+ zoom=10,
2018
+ height=600,
2019
+ title="Voice traffic distribution",
2020
+ hover_data={"code": True, "total_voice_trafic": True},
2021
+ hover_name="code",
2022
+ text=[str(x) for x in df_voice["code"]],
2023
+ )
2024
+ fig_map_voice.update_layout(
2025
+ mapbox_style="open-street-map",
2026
+ coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
2027
+ coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
2028
+ font=dict(size=10, color="black"),
2029
+ )
2030
+ voice_map_plot_pane.object = fig_map_voice
2031
+ else:
2032
+ voice_map_plot_pane.object = None
2033
+ else:
2034
+ data_map_plot_pane.object = None
2035
+ voice_map_plot_pane.object = None
2036
+
2037
+
2038
+ def _update_persistent_table_view(event=None) -> None: # noqa: D401, ARG001
2039
+ """Update persistent issues table based on current_persistent_df and top_critical_n."""
2040
+ if current_persistent_df is None or current_persistent_df.empty:
2041
+ persistent_table.value = pd.DataFrame()
2042
+ return
2043
+
2044
+ n = int(top_critical_n_widget.value or 25)
2045
+ persistent_table.value = current_persistent_df.head(n).round(2)
2046
+
2047
+
2048
+ def _recompute_persistent_from_widget(event=None) -> None: # noqa: ARG001
2049
+ """Recompute persistent issues when the minimum consecutive days widget changes."""
2050
+ global current_persistent_df
2051
+
2052
+ if (
2053
+ current_analysis_df is None
2054
+ or current_analysis_df.empty
2055
+ or current_multi_rat_df is None
2056
+ or current_multi_rat_df.empty
2057
+ ):
2058
+ current_persistent_df = None
2059
+ persistent_table.value = pd.DataFrame()
2060
+ return
2061
+
2062
+ persistent_df = analyze_persistent_availability(
2063
+ current_analysis_df,
2064
+ current_multi_rat_df,
2065
+ float(sla_2g.value),
2066
+ float(sla_3g.value),
2067
+ float(sla_lte.value),
2068
+ int(min_persistent_days_widget.value),
2069
+ )
2070
+
2071
+ current_persistent_df = (
2072
+ persistent_df if persistent_df is not None and not persistent_df.empty else None
2073
+ )
2074
+ _update_persistent_table_view()
2075
+
2076
+
2077
+ def _build_input_parameters_df() -> pd.DataFrame:
2078
+ """Build DataFrame with input parameters used for the report."""
2079
+ params = []
2080
+ if file_2g.filename:
2081
+ params.append({"Parameter": "2G Report File", "Value": file_2g.filename})
2082
+ if file_3g.filename:
2083
+ params.append({"Parameter": "3G Report File", "Value": file_3g.filename})
2084
+ if file_lte.filename:
2085
+ params.append({"Parameter": "LTE Report File", "Value": file_lte.filename})
2086
+ if pre_range.value and len(pre_range.value) == 2:
2087
+ params.append({"Parameter": "Pre-Period Start", "Value": pre_range.value[0]})
2088
+ params.append({"Parameter": "Pre-Period End", "Value": pre_range.value[1]})
2089
+ if post_range.value and len(post_range.value) == 2:
2090
+ params.append({"Parameter": "Post-Period Start", "Value": post_range.value[0]})
2091
+ params.append({"Parameter": "Post-Period End", "Value": post_range.value[1]})
2092
+ if last_range.value and len(last_range.value) == 2:
2093
+ params.append({"Parameter": "Last Period Start", "Value": last_range.value[0]})
2094
+ params.append({"Parameter": "Last Period End", "Value": last_range.value[1]})
2095
+ params.append({"Parameter": "2G TCH Availability SLA (%)", "Value": sla_2g.value})
2096
+ params.append({"Parameter": "3G Cell Availability SLA (%)", "Value": sla_3g.value})
2097
+ params.append(
2098
+ {"Parameter": "LTE Cell Availability SLA (%)", "Value": sla_lte.value}
2099
+ )
2100
+ params.append(
2101
+ {
2102
+ "Parameter": "Number of Top Traffic Sites",
2103
+ "Value": number_of_top_trafic_sites.value,
2104
+ }
2105
+ )
2106
+ params.append(
2107
+ {
2108
+ "Parameter": "Number of Top Critical Sites",
2109
+ "Value": top_critical_n_widget.value,
2110
+ }
2111
+ )
2112
+ params.append(
2113
+ {
2114
+ "Parameter": "Minimum Consecutive Days Below SLA",
2115
+ "Value": min_persistent_days_widget.value,
2116
+ }
2117
+ )
2118
+ params.append({"Parameter": "Export Timestamp", "Value": datetime.now()})
2119
+ return pd.DataFrame(params)
2120
+
2121
+
2122
+ def _build_export_bytes() -> bytes:
2123
+ """Build Excel report bytes mirroring Streamlit export structure."""
2124
+ if current_full_df is None:
2125
+ return b""
2126
+
2127
+ dfs: list[pd.DataFrame] = [
2128
+ _build_input_parameters_df(),
2129
+ current_full_df,
2130
+ (
2131
+ current_sum_pre_post_df
2132
+ if current_sum_pre_post_df is not None
2133
+ else pd.DataFrame()
2134
+ ),
2135
+ (
2136
+ current_avg_pre_post_df
2137
+ if current_avg_pre_post_df is not None
2138
+ else pd.DataFrame()
2139
+ ),
2140
+ (
2141
+ current_monthly_voice_df
2142
+ if current_monthly_voice_df is not None
2143
+ else pd.DataFrame()
2144
+ ),
2145
+ (
2146
+ current_monthly_data_df
2147
+ if current_monthly_data_df is not None
2148
+ else pd.DataFrame()
2149
+ ),
2150
+ (
2151
+ current_availability_summary_all_df
2152
+ if current_availability_summary_all_df is not None
2153
+ else pd.DataFrame()
2154
+ ),
2155
+ current_site_2g_avail if current_site_2g_avail is not None else pd.DataFrame(),
2156
+ current_site_3g_avail if current_site_3g_avail is not None else pd.DataFrame(),
2157
+ (
2158
+ current_site_lte_avail
2159
+ if current_site_lte_avail is not None
2160
+ else pd.DataFrame()
2161
+ ),
2162
+ (
2163
+ current_export_multi_rat_df
2164
+ if current_export_multi_rat_df is not None
2165
+ else pd.DataFrame()
2166
+ ),
2167
+ (
2168
+ current_export_persistent_df
2169
+ if current_export_persistent_df is not None
2170
+ else pd.DataFrame()
2171
+ ),
2172
+ ]
2173
+
2174
+ sheet_names = [
2175
+ "Input_Parameters",
2176
+ "Global_Trafic_Analysis",
2177
+ "Sum_pre_post_analysis",
2178
+ "Avg_pre_post_analysis",
2179
+ "Monthly_voice_analysis",
2180
+ "Monthly_data_analysis",
2181
+ "Availability_Summary_All_RAT",
2182
+ "TwoG_Availability_By_Site",
2183
+ "ThreeG_Availability_By_Site",
2184
+ "LTE_Availability_By_Site",
2185
+ "MultiRAT_Availability_By_Site",
2186
+ "Top_Critical_Sites",
2187
+ ]
2188
+
2189
+ return write_dfs_to_excel(dfs, sheet_names, index=True)
2190
+
2191
+
2192
+ def _export_callback() -> bytes:
2193
+ # Use cached bytes from the last completed analysis to make download instant
2194
+ data = current_export_bytes or b""
2195
+ if not data:
2196
+ return io.BytesIO()
2197
+ # FileDownload expects a file path or file-like object, not raw bytes
2198
+ return io.BytesIO(data)
2199
+
2200
+
2201
+ def _df_to_csv_bytes(df: pd.DataFrame | None) -> io.BytesIO:
2202
+ if df is None or getattr(df, "empty", True): # handles None and empty DataFrame
2203
+ return io.BytesIO()
2204
+ return io.BytesIO(df.to_csv(index=False).encode("utf-8"))
2205
+
2206
+
2207
+ def _download_multi_rat_table() -> io.BytesIO:
2208
+ value = getattr(multi_rat_table, "value", None)
2209
+ return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
2210
+
2211
+
2212
+ def _download_persistent_table() -> io.BytesIO:
2213
+ value = getattr(persistent_table, "value", None)
2214
+ return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
2215
+
2216
+
2217
+ def _download_top_data_sites() -> io.BytesIO:
2218
+ value = getattr(top_data_sites_table, "value", None)
2219
+ return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
2220
+
2221
+
2222
+ def _download_top_voice_sites() -> io.BytesIO:
2223
+ value = getattr(top_voice_sites_table, "value", None)
2224
+ return _df_to_csv_bytes(value if isinstance(value, pd.DataFrame) else None)
2225
+
2226
+
2227
+ # Client-side Fullscreen JS logic
2228
+ # We target the specific CSS class assigned to each plot pane.
2229
+ # Client-side Fullscreen JS logic with Shadow DOM support
2230
+ _JS_FULLSCREEN = """
2231
+ function findDeep(root, cls) {
2232
+ if (!root) return null;
2233
+ if (root.classList && root.classList.contains(cls)) return root;
2234
+
2235
+ if (root.shadowRoot) {
2236
+ var found = findDeep(root.shadowRoot, cls);
2237
+ if (found) return found;
2238
+ }
2239
+
2240
+ var children = root.children;
2241
+ if (children) {
2242
+ for (var i = 0; i < children.length; i++) {
2243
+ var found = findDeep(children[i], cls);
2244
+ if (found) return found;
2245
+ }
2246
+ }
2247
+ return null;
2248
+ }
2249
+
2250
+ var el = findDeep(document.body, target_class);
2251
+
2252
+ if (el) {
2253
+ if (el.requestFullscreen) {
2254
+ el.requestFullscreen();
2255
+ } else if (el.webkitRequestFullscreen) {
2256
+ el.webkitRequestFullscreen();
2257
+ } else if (el.msRequestFullscreen) {
2258
+ el.msRequestFullscreen();
2259
+ }
2260
+ } else {
2261
+ // Debug info
2262
+ alert("Impossible de passer en plein écran : élément '" + target_class + "' introuvable même après recherche approfondie (Shadow DOM).");
2263
+ }
2264
+ """
2265
+
2266
+
2267
+ # Reactive bindings for drill-down controls & export
2268
+ site_select.param.watch(_update_site_view, "value")
2269
+ city_select.param.watch(_update_city_view, "value")
2270
+ top_critical_n_widget.param.watch(_update_persistent_table_view, "value")
2271
+ number_of_top_trafic_sites.param.watch(_update_top_sites_and_maps, "value")
2272
+ min_persistent_days_widget.param.watch(_recompute_persistent_from_widget, "value")
2273
+
2274
+ export_button.callback = _export_callback
2275
+ multi_rat_download.callback = _download_multi_rat_table
2276
+ persistent_download.callback = _download_persistent_table
2277
+ top_data_download.callback = _download_top_data_sites
2278
+ top_voice_download.callback = _download_top_voice_sites
2279
+
2280
+ site_traffic_fullscreen_btn.js_on_click(
2281
+ args={"target_class": "site-traffic-wrapper"},
2282
+ code=_JS_FULLSCREEN,
2283
+ )
2284
+ site_avail_fullscreen_btn.js_on_click(
2285
+ args={"target_class": "site-avail-wrapper"},
2286
+ code=_JS_FULLSCREEN,
2287
+ )
2288
+ city_traffic_fullscreen_btn.js_on_click(
2289
+ args={"target_class": "city-traffic-wrapper"},
2290
+ code=_JS_FULLSCREEN,
2291
+ )
2292
+ city_avail_fullscreen_btn.js_on_click(
2293
+ args={"target_class": "city-avail-wrapper"},
2294
+ code=_JS_FULLSCREEN,
2295
+ )
2296
+ daily_avail_fullscreen_btn.js_on_click(
2297
+ args={"target_class": "daily-avail-wrapper"},
2298
+ code=_JS_FULLSCREEN,
2299
+ )
2300
+ top_data_fullscreen_btn.js_on_click(
2301
+ args={"target_class": "top-data-bar-wrapper"},
2302
+ code=_JS_FULLSCREEN,
2303
+ )
2304
+ top_voice_fullscreen_btn.js_on_click(
2305
+ args={
2306
+ "target_class": "top-voice-bar-wrapper",
2307
+ },
2308
+ code=_JS_FULLSCREEN,
2309
+ )
2310
+ data_map_fullscreen_btn.js_on_click(
2311
+ args={"target_class": "data-map-wrapper"},
2312
+ code=_JS_FULLSCREEN,
2313
+ )
2314
+ voice_map_fullscreen_btn.js_on_click(
2315
+ args={"target_class": "voice-map-wrapper"},
2316
+ code=_JS_FULLSCREEN,
2317
+ )
2318
+
2319
+
2320
+ # --------------------------------------------------------------------------------------
2321
+ # Material Template layout
2322
+ # --------------------------------------------------------------------------------------
2323
+
2324
+
2325
+ template = pn.template.MaterialTemplate(
2326
+ title="📊 Global Trafic Analysis - Panel (2G / 3G / LTE)",
2327
+ )
2328
+
2329
+ # Ensure the template modal is large enough for fullscreen charts
2330
+ # Modal CSS override removed as we switched to native fullscreen.
2331
+
2332
+ sidebar_content = pn.Column(
2333
+ """This Panel app is a migration of the existing Streamlit-based global traffic analysis.
2334
+
2335
+ Upload the 3 traffic reports (2G / 3G / LTE), configure the analysis periods and SLAs, then run the analysis.
2336
+
2337
+ In this first step, the app only validates the pipeline and shows a lightweight summary of the inputs.\nFull KPIs and visualizations will be added progressively.""",
2338
+ "---",
2339
+ file_2g,
2340
+ file_3g,
2341
+ file_lte,
2342
+ "---",
2343
+ pre_range,
2344
+ post_range,
2345
+ last_range,
2346
+ "---",
2347
+ sla_2g,
2348
+ sla_3g,
2349
+ sla_lte,
2350
+ "---",
2351
+ number_of_top_trafic_sites,
2352
+ min_persistent_days_widget,
2353
+ top_critical_n_widget,
2354
+ "---",
2355
+ run_button,
2356
+ )
2357
+
2358
+ main_content = pn.Column(
2359
+ status_pane,
2360
+ pn.pane.Markdown("## Input datasets summary"),
2361
+ summary_table,
2362
+ pn.layout.Divider(),
2363
+ pn.pane.Markdown("## Summary Analysis Pre / Post"),
2364
+ sum_pre_post_table,
2365
+ pn.layout.Divider(),
2366
+ pn.pane.Markdown("## Availability vs SLA (per RAT)"),
2367
+ pn.Tabs(
2368
+ (
2369
+ "2G",
2370
+ pn.Column(
2371
+ summary_2g_table, pn.pane.Markdown("Worst 25 sites"), worst_2g_table
2372
+ ),
2373
+ ),
2374
+ (
2375
+ "3G",
2376
+ pn.Column(
2377
+ summary_3g_table, pn.pane.Markdown("Worst 25 sites"), worst_3g_table
2378
+ ),
2379
+ ),
2380
+ (
2381
+ "LTE",
2382
+ pn.Column(
2383
+ summary_lte_table, pn.pane.Markdown("Worst 25 sites"), worst_lte_table
2384
+ ),
2385
+ ),
2386
+ ),
2387
+ pn.layout.Divider(),
2388
+ pn.pane.Markdown("## Multi-RAT Availability (post-period)"),
2389
+ multi_rat_table,
2390
+ multi_rat_download,
2391
+ pn.layout.Divider(),
2392
+ pn.pane.Markdown("## Persistent availability issues (critical sites)"),
2393
+ persistent_table,
2394
+ persistent_download,
2395
+ pn.layout.Divider(),
2396
+ pn.pane.Markdown("## Site drill-down: traffic and availability over time"),
2397
+ site_select,
2398
+ site_traffic_plot,
2399
+ site_traffic_fullscreen_btn,
2400
+ site_avail_plot,
2401
+ site_avail_fullscreen_btn,
2402
+ site_degraded_table,
2403
+ pn.layout.Divider(),
2404
+ pn.pane.Markdown("## City drill-down: traffic and availability over time"),
2405
+ city_select,
2406
+ city_traffic_plot,
2407
+ city_traffic_fullscreen_btn,
2408
+ city_avail_plot,
2409
+ city_avail_fullscreen_btn,
2410
+ city_degraded_table,
2411
+ pn.layout.Divider(),
2412
+ pn.pane.Markdown("## Daily average availability per RAT"),
2413
+ daily_avail_plot,
2414
+ daily_avail_fullscreen_btn,
2415
+ daily_degraded_table,
2416
+ pn.layout.Divider(),
2417
+ pn.pane.Markdown("## Top traffic sites and geographic maps (last period)"),
2418
+ pn.Row(
2419
+ pn.Column(
2420
+ pn.pane.Markdown("### Top sites by data traffic"),
2421
+ top_data_sites_table,
2422
+ top_data_download,
2423
+ top_data_bar_plot,
2424
+ top_data_fullscreen_btn,
2425
+ ),
2426
+ pn.Column(
2427
+ pn.pane.Markdown("### Top sites by voice traffic"),
2428
+ top_voice_sites_table,
2429
+ top_voice_download,
2430
+ top_voice_bar_plot,
2431
+ top_voice_fullscreen_btn,
2432
+ ),
2433
+ ),
2434
+ pn.Row(
2435
+ pn.Column(
2436
+ pn.pane.Markdown("### Data traffic map"),
2437
+ data_map_plot,
2438
+ data_map_fullscreen_btn,
2439
+ ),
2440
+ pn.Column(
2441
+ pn.pane.Markdown("### Voice traffic map"),
2442
+ voice_map_plot,
2443
+ voice_map_fullscreen_btn,
2444
+ ),
2445
+ ),
2446
+ pn.layout.Divider(),
2447
+ pn.pane.Markdown("## Export"),
2448
+ export_button,
2449
+ )
2450
+
2451
+
2452
+ def get_page_components():
2453
+ return sidebar_content, main_content
2454
+
2455
+
2456
+ if __name__ == "__main__":
2457
+ template.sidebar.append(sidebar_content)
2458
+ template.main.append(main_content)
2459
+ template.servable()
physical_db/physical_database.csv ADDED
The diff for this file is too large to render. See raw diff
 
process_kpi/__init__.py ADDED
File without changes
process_kpi/gsm_kpi_requirements.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Required Input
2
+
3
+ - BH report
4
+ - Daily Report
5
+ - Dump file (2G dump)
6
+ - Number of last day for the analysis
7
+ - Number of days for blocking
8
+ - Sddch blocking threshold
9
+ - TCH blocking threshold
10
+ - Availability threshold
11
+ - TCH abis fails threshold
12
+
13
+ Analyse
14
+
15
+ DUMP
16
+
17
+ - Check that mandatory sheet exists in the dump
18
+ - Parse 2G databases
19
+ - Get number of TRX,TCH,SDCCH,amrSegLoadDepTchRateLower,amrSegLoadDepTchRateUpper from databases
20
+ - Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
21
+ - Get "Coef HF rate" by mapping "amrSegLoadDepTchRateLower" to 2G analysis_utility "hf_rate_coef" dict
22
+ - "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
23
+ - Get "Offered Traffic" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
24
+
25
+ BH DATA
26
+
27
+ - Pivot KPI in BH report
28
+ - Calculate Average and Max of Traffic
29
+ - Average of TCH blocking
30
+ - Average of SDCCH blocking
31
+ - Count number of Days with TCH blocking exceeded TCH blocking threshold
32
+ - Count number of Days with SDCCH blocking exceeded Sddch blocking threshold
33
+ - Count number of Days with Availability below Availability threshold
34
+ - "TCH UTILIZATION (@Max Traffic)" equal to "Max_Trafic" divided by "offered Traffic"
35
+ - Add "ErlabngB_value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
36
+ - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
37
+ - "Target HR CHs" equal to "Target FR CHs" * 2
38
+ - Get "Signal" and "GPRS" value from databases
39
+ - Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
40
+ - "Target TRXs" equal to roundup(Target TCHs/8)
41
+ - "# of required TRXs" equal to difference between "Target TRXs" and "number of TRX"
42
+
43
+ Daily DATA
44
+
45
+ - Pivot KPI in Daily Report
46
+ - Count number of Days with Availability below Availability threshold
47
+ - Count number of Days with abis fails exceeded TCH abis fails threshold
process_kpi/kpi_health_check/__init__.py ADDED
File without changes
process_kpi/kpi_health_check/benchmarks.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ def calculate_sla_metrics(
5
+ df: pd.DataFrame,
6
+ kpi: str,
7
+ rules_df: pd.DataFrame | None = None
8
+ ) -> dict:
9
+ """
10
+ Calculates simple metrics for the given KPI trace:
11
+ - SLA value (if exists)
12
+ - Median (recent window)
13
+
14
+ Returns a dict with: 'sla': float|None, 'median': float|None
15
+ """
16
+ res = {"sla": None, "median": None}
17
+
18
+ if df is None or df.empty or kpi not in df.columns:
19
+ return res
20
+
21
+ # 1. Get SLA from rules
22
+ if rules_df is not None and not rules_df.empty:
23
+ # Assuming rules_df has 'KPI' and 'sla' columns
24
+ # We also need to match RAT? usually passed or handled outside.
25
+ # Here we do a simplistic lookup.
26
+ try:
27
+ row = rules_df[rules_df["KPI"] == kpi]
28
+ if not row.empty:
29
+ val = row.iloc[0].get("sla")
30
+ res["sla"] = float(val) if pd.notna(val) else None
31
+ except Exception:
32
+ pass
33
+
34
+ # 2. Calculate Median (entire passed df, usually it's the recent window)
35
+ try:
36
+ vals = pd.to_numeric(df[kpi], errors="coerce").dropna()
37
+ if not vals.empty:
38
+ res["median"] = float(vals.median())
39
+ except Exception:
40
+ pass
41
+
42
+ return res
process_kpi/kpi_health_check/engine.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date, datetime, timedelta
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ def _to_timestamp(value) -> pd.Timestamp | None:
8
+ if value is None:
9
+ return None
10
+ if isinstance(value, pd.Timestamp):
11
+ return value
12
+ if isinstance(value, datetime):
13
+ return pd.Timestamp(value)
14
+ if isinstance(value, date):
15
+ return pd.Timestamp(value)
16
+ try:
17
+ v = pd.to_datetime(value, errors="coerce")
18
+ return v if pd.notna(v) else None
19
+ except Exception: # noqa: BLE001
20
+ return None
21
+
22
+
23
+ def window_bounds_period(
24
+ end_dt: pd.Timestamp,
25
+ periods: int,
26
+ step: timedelta,
27
+ ) -> tuple[pd.Timestamp, pd.Timestamp]:
28
+ start = end_dt - step * (int(periods) - 1)
29
+ return start, end_dt
30
+
31
+
32
+ def window_bounds(end_date: date, days: int) -> tuple[date, date]:
33
+ start = end_date - timedelta(days=days - 1)
34
+ return start, end_date
35
+
36
+
37
+ def is_bad(
38
+ value: float | None,
39
+ baseline: float | None,
40
+ direction: str,
41
+ rel_threshold_pct: float,
42
+ sla: float | None,
43
+ ) -> bool:
44
+ if value is None or (isinstance(value, float) and np.isnan(value)):
45
+ return False
46
+ bad = False
47
+ if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
48
+ if direction == "higher_is_better":
49
+ bad = bad or (value < float(sla))
50
+ else:
51
+ bad = bad or (value > float(sla))
52
+
53
+ if baseline is None or (isinstance(baseline, float) and np.isnan(baseline)):
54
+ return bad
55
+
56
+ thr = float(rel_threshold_pct) / 100.0
57
+ if direction == "higher_is_better":
58
+ return bad or (value < baseline - abs(baseline) * thr)
59
+ return bad or (value > baseline + abs(baseline) * thr)
60
+
61
+
62
+ def max_consecutive_periods(values: list, step: timedelta) -> int:
63
+ if not values:
64
+ return 0
65
+ ts = [_to_timestamp(v) for v in values]
66
+ ts2 = [t for t in ts if t is not None]
67
+ if not ts2:
68
+ return 0
69
+ ts_sorted = sorted(set(ts2))
70
+ streak = 1
71
+ best = 1
72
+ for prev, cur in zip(ts_sorted, ts_sorted[1:]):
73
+ if cur == prev + step:
74
+ streak += 1
75
+ else:
76
+ streak = 1
77
+ if streak > best:
78
+ best = streak
79
+ return best
80
+
81
+
82
+ def max_consecutive_days(dates: list[date]) -> int:
83
+ return max_consecutive_periods(dates, step=timedelta(days=1))
84
+
85
+
86
+ def evaluate_health_check(
87
+ daily: pd.DataFrame,
88
+ rat: str,
89
+ rules_df: pd.DataFrame,
90
+ baseline_days_n: int,
91
+ recent_days_n: int,
92
+ rel_threshold_pct: float,
93
+ min_consecutive_days: int,
94
+ granularity: str = "Daily",
95
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
96
+ if daily.empty:
97
+ return pd.DataFrame(), pd.DataFrame()
98
+
99
+ g = str(granularity or "Daily").strip().lower()
100
+ is_hourly = g.startswith("hour") or g.startswith("h")
101
+ time_col = (
102
+ "period_start"
103
+ if (is_hourly and "period_start" in daily.columns)
104
+ else "date_only"
105
+ )
106
+
107
+ step = timedelta(hours=1) if is_hourly else timedelta(days=1)
108
+ baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
109
+ recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
110
+ min_periods = (
111
+ int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
112
+ )
113
+
114
+ end_dt = _to_timestamp(pd.to_datetime(daily[time_col], errors="coerce").max())
115
+ if end_dt is None:
116
+ return pd.DataFrame(), pd.DataFrame()
117
+
118
+ recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
119
+ baseline_end_dt = recent_start_dt - step
120
+ baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
121
+
122
+ rat_rules = rules_df[rules_df["RAT"] == rat].copy()
123
+ kpis = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
124
+ rules_by_kpi = {
125
+ str(r["KPI"]): r
126
+ for r in rat_rules.to_dict(orient="records")
127
+ if str(r.get("KPI", ""))
128
+ }
129
+
130
+ rows = []
131
+
132
+ for site_code, g_site in daily.groupby("site_code"):
133
+ city = (
134
+ g_site["City"].dropna().iloc[0]
135
+ if ("City" in g_site.columns and g_site["City"].notna().any())
136
+ else None
137
+ )
138
+ g_site = g_site.sort_values(time_col)
139
+ t_all = pd.to_datetime(g_site[time_col], errors="coerce")
140
+ baseline_mask_all = (t_all >= baseline_start_dt) & (t_all <= baseline_end_dt)
141
+ recent_mask_all = (t_all >= recent_start_dt) & (t_all <= recent_end_dt)
142
+
143
+ for kpi in kpis:
144
+ rule = rules_by_kpi.get(str(kpi), {})
145
+ direction = str(rule.get("direction", "higher_is_better"))
146
+ policy = str(rule.get("policy", "enforce") or "enforce").strip().lower()
147
+ sla = rule.get("sla", np.nan)
148
+ try:
149
+ sla_val = float(sla) if pd.notna(sla) else None
150
+ except Exception:
151
+ sla_val = None
152
+
153
+ sla_eval = None if policy == "notify" else sla_val
154
+
155
+ vals = pd.to_numeric(g_site[kpi], errors="coerce")
156
+ has_any = bool(vals.notna().any())
157
+ if not has_any:
158
+ rows.append(
159
+ {
160
+ "RAT": rat,
161
+ "site_code": int(site_code),
162
+ "City": city,
163
+ "KPI": kpi,
164
+ "status": "NO_DATA",
165
+ }
166
+ )
167
+ continue
168
+
169
+ baseline_vals = vals.loc[baseline_mask_all]
170
+ recent_vals = vals.loc[recent_mask_all]
171
+ t_recent = t_all.loc[recent_vals.index]
172
+
173
+ baseline = (
174
+ baseline_vals.median(skipna=True) if baseline_mask_all.any() else np.nan
175
+ )
176
+ recent = (
177
+ recent_vals.median(skipna=True) if recent_mask_all.any() else np.nan
178
+ )
179
+
180
+ bad_dates: list = []
181
+ if recent_mask_all.any() and recent_vals.notna().any():
182
+ thr = float(rel_threshold_pct) / 100.0
183
+ b = float(baseline) if pd.notna(baseline) else None
184
+ bad_series = pd.Series(False, index=recent_vals.index)
185
+
186
+ if b is not None:
187
+ if direction == "higher_is_better":
188
+ bad_series = bad_series | (recent_vals < (b - abs(b) * thr))
189
+ else:
190
+ bad_series = bad_series | (recent_vals > (b + abs(b) * thr))
191
+
192
+ if sla_eval is not None and pd.notna(sla_eval):
193
+ if direction == "higher_is_better":
194
+ bad_series = bad_series | (recent_vals < float(sla_eval))
195
+ else:
196
+ bad_series = bad_series | (recent_vals > float(sla_eval))
197
+
198
+ bad_series = bad_series & recent_vals.notna() & t_recent.notna()
199
+ if bool(bad_series.any()):
200
+ bad_dates = t_recent.loc[bad_series].tolist()
201
+
202
+ max_streak = max_consecutive_periods(bad_dates, step=step)
203
+ persistent = max_streak >= int(min_periods)
204
+
205
+ is_bad_recent = is_bad(
206
+ float(recent) if pd.notna(recent) else None,
207
+ float(baseline) if pd.notna(baseline) else None,
208
+ direction,
209
+ rel_threshold_pct,
210
+ sla_eval,
211
+ )
212
+
213
+ is_bad_current = is_bad_recent
214
+ try:
215
+ last_mask = recent_mask_all & vals.notna() & t_all.notna()
216
+ if bool(last_mask.any()):
217
+ idx_last = t_all.loc[last_mask].idxmax()
218
+ last_val = vals.loc[idx_last]
219
+ is_bad_current = is_bad(
220
+ float(last_val) if pd.notna(last_val) else None,
221
+ float(baseline) if pd.notna(baseline) else None,
222
+ direction,
223
+ rel_threshold_pct,
224
+ sla_eval,
225
+ )
226
+ except Exception: # noqa: BLE001
227
+ pass
228
+
229
+ had_bad_recent = (len(bad_dates) > 0) or bool(is_bad_recent)
230
+
231
+ if policy == "notify":
232
+ if is_bad_current:
233
+ status = "NOTIFY"
234
+ elif had_bad_recent:
235
+ status = "NOTIFY_RESOLVED"
236
+ else:
237
+ status = "OK"
238
+ else:
239
+ if is_bad_current and persistent:
240
+ status = "PERSISTENT_DEGRADED"
241
+ elif is_bad_current:
242
+ status = "DEGRADED"
243
+ elif had_bad_recent:
244
+ status = "RESOLVED"
245
+ else:
246
+ status = "OK"
247
+
248
+ rows.append(
249
+ {
250
+ "RAT": rat,
251
+ "site_code": int(site_code),
252
+ "City": city,
253
+ "KPI": kpi,
254
+ "direction": direction,
255
+ "sla": sla_val,
256
+ "policy": policy,
257
+ "baseline_median": baseline,
258
+ "recent_median": recent,
259
+ "bad_days_recent": len(bad_dates),
260
+ "max_streak_recent": int(max_streak),
261
+ "status": status,
262
+ }
263
+ )
264
+
265
+ status_df = pd.DataFrame(rows)
266
+
267
+ summary_rows = []
268
+ for site_code, g in status_df.groupby("site_code"):
269
+ city = (
270
+ g["City"].dropna().iloc[0]
271
+ if ("City" in g.columns and g["City"].notna().any())
272
+ else None
273
+ )
274
+ degraded_cnt = int(g["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum())
275
+ persistent_cnt = int((g["status"] == "PERSISTENT_DEGRADED").sum())
276
+ resolved_cnt = int((g["status"] == "RESOLVED").sum())
277
+ summary_rows.append(
278
+ {
279
+ "RAT": rat,
280
+ "site_code": int(site_code),
281
+ "City": city,
282
+ "degraded_kpis": degraded_cnt,
283
+ "persistent_kpis": persistent_cnt,
284
+ "resolved_kpis": resolved_cnt,
285
+ }
286
+ )
287
+
288
+ summary_df = pd.DataFrame(summary_rows).sort_values(
289
+ by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
290
+ ascending=[False, False, False],
291
+ )
292
+
293
+ return status_df, summary_df
process_kpi/kpi_health_check/engine_v2.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from datetime import timedelta
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from process_kpi.kpi_health_check.engine import window_bounds_period
9
+
10
+
11
+ def _to_datetime_series(s: pd.Series) -> pd.Series:
12
+ try:
13
+ return pd.to_datetime(s, errors="coerce")
14
+ except Exception:
15
+ return pd.to_datetime(pd.Series([], dtype="datetime64[ns]"), errors="coerce")
16
+
17
+
18
+ def _vector_is_bad(
19
+ value: pd.Series,
20
+ baseline: pd.Series,
21
+ direction: str,
22
+ rel_threshold_pct: float,
23
+ sla: float | None,
24
+ ) -> pd.Series:
25
+ v = pd.to_numeric(value, errors="coerce")
26
+ b = pd.to_numeric(baseline, errors="coerce")
27
+
28
+ bad = pd.Series(False, index=v.index)
29
+
30
+ if sla is not None and not (isinstance(sla, float) and np.isnan(sla)):
31
+ if str(direction) == "higher_is_better":
32
+ bad = bad | (v < float(sla))
33
+ else:
34
+ bad = bad | (v > float(sla))
35
+
36
+ thr = float(rel_threshold_pct) / 100.0
37
+ has_b = b.notna()
38
+ if bool(has_b.any()):
39
+ if str(direction) == "higher_is_better":
40
+ bad = bad | (v < (b - b.abs() * thr))
41
+ else:
42
+ bad = bad | (v > (b + b.abs() * thr))
43
+
44
+ bad = bad & v.notna()
45
+ return bad
46
+
47
+
48
+ def evaluate_health_check(
49
+ daily: pd.DataFrame,
50
+ rat: str,
51
+ rules_df: pd.DataFrame,
52
+ baseline_days_n: int,
53
+ recent_days_n: int,
54
+ rel_threshold_pct: float,
55
+ min_consecutive_days: int,
56
+ granularity: str = "Daily",
57
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
58
+ if daily is None or daily.empty:
59
+ return pd.DataFrame(), pd.DataFrame()
60
+
61
+ g = str(granularity or "Daily").strip().lower()
62
+ is_hourly = g.startswith("hour") or g.startswith("h")
63
+ time_col = (
64
+ "period_start"
65
+ if (is_hourly and "period_start" in daily.columns)
66
+ else "date_only"
67
+ )
68
+
69
+ step = timedelta(hours=1) if is_hourly else timedelta(days=1)
70
+ baseline_periods = int(baseline_days_n) * 24 if is_hourly else int(baseline_days_n)
71
+ recent_periods = int(recent_days_n) * 24 if is_hourly else int(recent_days_n)
72
+ min_periods = (
73
+ int(min_consecutive_days) * 24 if is_hourly else int(min_consecutive_days)
74
+ )
75
+
76
+ t_all = _to_datetime_series(daily[time_col])
77
+ end_dt = t_all.max()
78
+ if pd.isna(end_dt):
79
+ return pd.DataFrame(), pd.DataFrame()
80
+
81
+ end_dt = pd.Timestamp(end_dt)
82
+ if is_hourly:
83
+ end_dt = end_dt.floor("h")
84
+
85
+ recent_start_dt, recent_end_dt = window_bounds_period(end_dt, recent_periods, step)
86
+ baseline_end_dt = recent_start_dt - step
87
+ baseline_start_dt, _ = window_bounds_period(baseline_end_dt, baseline_periods, step)
88
+
89
+ rat_rules = (
90
+ rules_df[rules_df["RAT"] == rat].copy()
91
+ if isinstance(rules_df, pd.DataFrame)
92
+ else pd.DataFrame()
93
+ )
94
+ if rat_rules.empty or "KPI" not in rat_rules.columns:
95
+ return pd.DataFrame(), pd.DataFrame()
96
+
97
+ kpi_cols = [k for k in rat_rules["KPI"].tolist() if k in daily.columns]
98
+ if not kpi_cols:
99
+ return pd.DataFrame(), pd.DataFrame()
100
+
101
+ base_cols = ["site_code", time_col]
102
+ if "City" in daily.columns:
103
+ base_cols.append("City")
104
+ base = daily[base_cols + kpi_cols].copy()
105
+ base["site_code"] = pd.to_numeric(base["site_code"], errors="coerce")
106
+ base = base.dropna(subset=["site_code"]).copy()
107
+ base["site_code"] = base["site_code"].astype(int)
108
+
109
+ base_t = _to_datetime_series(base[time_col])
110
+ base["_t"] = base_t
111
+ base = base.dropna(subset=["_t"]).copy()
112
+
113
+ baseline_mask = (base["_t"] >= pd.to_datetime(baseline_start_dt)) & (
114
+ base["_t"] <= pd.to_datetime(baseline_end_dt)
115
+ )
116
+ recent_mask = (base["_t"] >= pd.to_datetime(recent_start_dt)) & (
117
+ base["_t"] <= pd.to_datetime(recent_end_dt)
118
+ )
119
+
120
+ counts = base.groupby("site_code")[kpi_cols].count()
121
+ all_sites = counts.index
122
+
123
+ if "City" in base.columns:
124
+ city_map = (
125
+ base[["site_code", "City"]]
126
+ .dropna(subset=["City"])
127
+ .drop_duplicates("site_code")
128
+ .set_index("site_code")["City"]
129
+ )
130
+ city = city_map.reindex(all_sites)
131
+ else:
132
+ city = pd.Series([None] * len(all_sites), index=all_sites)
133
+
134
+ baseline_subset = base.loc[baseline_mask, ["site_code"] + kpi_cols]
135
+ recent_subset = base.loc[recent_mask, ["site_code", "_t"] + kpi_cols]
136
+
137
+ baseline_medians = (
138
+ baseline_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
139
+ if not baseline_subset.empty
140
+ else pd.DataFrame(index=all_sites)
141
+ )
142
+ recent_medians = (
143
+ recent_subset.groupby("site_code")[kpi_cols].median(numeric_only=True)
144
+ if not recent_subset.empty
145
+ else pd.DataFrame(index=all_sites)
146
+ )
147
+
148
+ recent_sorted = (
149
+ recent_subset.sort_values(["site_code", "_t"])
150
+ if not recent_subset.empty
151
+ else recent_subset
152
+ )
153
+ gap = recent_sorted.groupby("site_code")["_t"].diff()
154
+ gap_ok = (gap == step).fillna(False)
155
+
156
+ out_frames: list[pd.DataFrame] = []
157
+
158
+ for _, rr in rat_rules.iterrows():
159
+ kpi = str(rr.get("KPI"))
160
+ if not kpi or kpi not in kpi_cols:
161
+ continue
162
+
163
+ direction = str(rr.get("direction", "higher_is_better"))
164
+ policy = str(rr.get("policy", "enforce") or "enforce").strip().lower()
165
+ sla_raw = rr.get("sla", np.nan)
166
+ try:
167
+ sla_val = float(sla_raw) if pd.notna(sla_raw) else None
168
+ except Exception:
169
+ sla_val = None
170
+
171
+ sla_eval = None if policy == "notify" else sla_val
172
+
173
+ cnt = counts[kpi].reindex(all_sites).fillna(0).astype(int)
174
+ has_any = cnt > 0
175
+
176
+ baseline = (
177
+ baseline_medians[kpi].reindex(all_sites)
178
+ if kpi in baseline_medians.columns
179
+ else pd.Series([np.nan] * len(all_sites), index=all_sites)
180
+ )
181
+ recent = (
182
+ recent_medians[kpi].reindex(all_sites)
183
+ if kpi in recent_medians.columns
184
+ else pd.Series([np.nan] * len(all_sites), index=all_sites)
185
+ )
186
+
187
+ if not recent_sorted.empty and kpi in recent_sorted.columns:
188
+ v_recent = pd.to_numeric(recent_sorted[kpi], errors="coerce")
189
+ b_row = recent_sorted["site_code"].map(
190
+ pd.to_numeric(
191
+ baseline_medians.get(kpi, pd.Series(dtype=float)), errors="coerce"
192
+ )
193
+ )
194
+ bad_row = _vector_is_bad(
195
+ v_recent, b_row, direction, float(rel_threshold_pct), sla_eval
196
+ )
197
+ bad_row = bad_row & recent_sorted["_t"].notna()
198
+
199
+ start = (~gap_ok) | (~bad_row) | gap_ok.isna()
200
+ run_id = start.groupby(recent_sorted["site_code"]).cumsum()
201
+
202
+ bad_counts = (
203
+ bad_row.groupby(recent_sorted["site_code"])
204
+ .sum()
205
+ .reindex(all_sites)
206
+ .fillna(0)
207
+ .astype(int)
208
+ )
209
+ streaks = (
210
+ bad_row.groupby([recent_sorted["site_code"], run_id])
211
+ .sum()
212
+ .groupby(level=0)
213
+ .max()
214
+ .reindex(all_sites)
215
+ .fillna(0)
216
+ .astype(int)
217
+ )
218
+
219
+ tmp_last = (
220
+ recent_sorted[["site_code", "_t", kpi]]
221
+ .dropna(subset=[kpi])
222
+ .sort_values(["site_code", "_t"])
223
+ )
224
+ if not tmp_last.empty:
225
+ last_vals = tmp_last.groupby("site_code")[kpi].tail(1)
226
+ last_map = pd.Series(
227
+ last_vals.values,
228
+ index=tmp_last.groupby("site_code")
229
+ .tail(1)["site_code"]
230
+ .astype(int)
231
+ .values,
232
+ )
233
+ last = last_map.reindex(all_sites)
234
+ else:
235
+ last = pd.Series([np.nan] * len(all_sites), index=all_sites)
236
+ else:
237
+ bad_counts = pd.Series([0] * len(all_sites), index=all_sites)
238
+ streaks = pd.Series([0] * len(all_sites), index=all_sites)
239
+ last = pd.Series([np.nan] * len(all_sites), index=all_sites)
240
+
241
+ is_bad_recent = _vector_is_bad(
242
+ recent, baseline, direction, float(rel_threshold_pct), sla_eval
243
+ )
244
+ is_bad_current = _vector_is_bad(
245
+ last, baseline, direction, float(rel_threshold_pct), sla_eval
246
+ )
247
+ had_bad_recent = (bad_counts > 0) | is_bad_recent
248
+
249
+ persistent = streaks >= int(min_periods)
250
+
251
+ status = pd.Series("OK", index=all_sites)
252
+ status = status.where(has_any, "NO_DATA")
253
+
254
+ if policy == "notify":
255
+ status = status.where(~has_any, "NO_DATA")
256
+ status = status.where(~(has_any & is_bad_current), "NOTIFY")
257
+ status = status.where(
258
+ ~(has_any & (~is_bad_current) & had_bad_recent), "NOTIFY_RESOLVED"
259
+ )
260
+ else:
261
+ status = status.where(
262
+ ~(has_any & is_bad_current & persistent), "PERSISTENT_DEGRADED"
263
+ )
264
+ status = status.where(
265
+ ~(has_any & is_bad_current & (~persistent)), "DEGRADED"
266
+ )
267
+ status = status.where(
268
+ ~(has_any & (~is_bad_current) & had_bad_recent), "RESOLVED"
269
+ )
270
+
271
+ frame = pd.DataFrame(
272
+ {
273
+ "RAT": rat,
274
+ "site_code": all_sites.astype(int),
275
+ "City": city.values,
276
+ "KPI": kpi,
277
+ "direction": direction,
278
+ "sla": sla_val,
279
+ "policy": policy,
280
+ "baseline_median": baseline.values,
281
+ "recent_median": recent.values,
282
+ "bad_days_recent": bad_counts.values,
283
+ "max_streak_recent": streaks.values,
284
+ "status": status.values,
285
+ }
286
+ )
287
+ out_frames.append(frame)
288
+
289
+ if not out_frames:
290
+ return pd.DataFrame(), pd.DataFrame()
291
+
292
+ # Filter out empty frames to avoid FutureWarning about empty/all-NA entries
293
+ non_empty_frames = [f for f in out_frames if not f.empty and not f.isna().all().all()]
294
+ if not non_empty_frames:
295
+ return pd.DataFrame(), pd.DataFrame()
296
+
297
+ status_df = pd.concat(non_empty_frames, ignore_index=True)
298
+
299
+ summary = (
300
+ status_df.groupby("site_code", as_index=False)
301
+ .agg(
302
+ RAT=("RAT", "first"),
303
+ City=("City", "first"),
304
+ degraded_kpis=(
305
+ "status",
306
+ lambda s: int(s.isin(["DEGRADED", "PERSISTENT_DEGRADED"]).sum()),
307
+ ),
308
+ persistent_kpis=(
309
+ "status",
310
+ lambda s: int((s == "PERSISTENT_DEGRADED").sum()),
311
+ ),
312
+ resolved_kpis=("status", lambda s: int((s == "RESOLVED").sum())),
313
+ )
314
+ .sort_values(
315
+ by=["degraded_kpis", "persistent_kpis", "resolved_kpis"],
316
+ ascending=[False, False, False],
317
+ )
318
+ )
319
+
320
+ return status_df, summary
process_kpi/kpi_health_check/export.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from panel_app.convert_to_excel_panel import write_dfs_to_excel
4
+
5
+
6
+ def _normalize_time_key(
7
+ df: pd.DataFrame, granularity: str
8
+ ) -> tuple[str, pd.Series] | None:
9
+ if df is None or df.empty:
10
+ return None
11
+ g = str(granularity or "Daily").strip().lower()
12
+ is_hourly = g.startswith("hour") or g.startswith("h")
13
+ if is_hourly:
14
+ time_col = "period_start" if "period_start" in df.columns else "date_only"
15
+ t = pd.to_datetime(df.get(time_col), errors="coerce").dt.floor("h")
16
+ return time_col, t
17
+
18
+ time_col = "date_only" if "date_only" in df.columns else "period_start"
19
+ t = pd.to_datetime(df.get(time_col), errors="coerce").dt.date
20
+ return time_col, t
21
+
22
+
23
+ def _build_all_tech_sheet(
24
+ daily_by_rat: dict[str, pd.DataFrame],
25
+ granularity: str,
26
+ ) -> tuple[str, pd.DataFrame] | None:
27
+ if not daily_by_rat or not isinstance(daily_by_rat, dict):
28
+ return None
29
+
30
+ g = str(granularity or "Daily").strip().lower()
31
+ prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
32
+
33
+ ordered_rats = ["2G", "3G", "LTE", "TWAMP"]
34
+ present = [r for r in ordered_rats if r in daily_by_rat]
35
+ if not present:
36
+ present = [str(r) for r in daily_by_rat.keys()]
37
+
38
+ time_col = None
39
+ keys = []
40
+ coords_parts = []
41
+
42
+ for rat in present:
43
+ df = daily_by_rat.get(rat)
44
+ if not isinstance(df, pd.DataFrame) or df.empty:
45
+ continue
46
+
47
+ nt = _normalize_time_key(df, granularity)
48
+ if nt is None:
49
+ continue
50
+ tc, tkey = nt
51
+ if time_col is None:
52
+ time_col = tc
53
+
54
+ tmp = pd.DataFrame(
55
+ {"site_code": pd.to_numeric(df.get("site_code"), errors="coerce"), tc: tkey}
56
+ )
57
+ tmp = tmp.dropna(subset=["site_code", tc]).copy()
58
+ tmp["site_code"] = tmp["site_code"].astype(int)
59
+ keys.append(tmp[["site_code", tc]])
60
+
61
+ cols = [
62
+ c for c in ["site_code", "City", "Longitude", "Latitude"] if c in df.columns
63
+ ]
64
+ if cols:
65
+ cp = df[cols].copy()
66
+ cp["site_code"] = pd.to_numeric(cp["site_code"], errors="coerce")
67
+ cp = cp.dropna(subset=["site_code"]).copy()
68
+ cp["site_code"] = cp["site_code"].astype(int)
69
+ coords_parts.append(cp)
70
+
71
+ if not keys or time_col is None:
72
+ return None
73
+
74
+ base = pd.concat(keys, ignore_index=True).drop_duplicates(
75
+ subset=["site_code", time_col]
76
+ )
77
+
78
+ coords = None
79
+ if coords_parts:
80
+ coords_all = pd.concat(coords_parts, ignore_index=True)
81
+ coords_all = coords_all.drop_duplicates(subset=["site_code"])
82
+ keep = [
83
+ c
84
+ for c in ["site_code", "City", "Longitude", "Latitude"]
85
+ if c in coords_all.columns
86
+ ]
87
+ coords = coords_all[keep].copy() if keep else None
88
+
89
+ if isinstance(coords, pd.DataFrame) and not coords.empty:
90
+ base = pd.merge(base, coords, on="site_code", how="left")
91
+
92
+ base["ID"] = base[time_col].astype(str) + "_" + base["site_code"].astype(str)
93
+
94
+ meta_cols = {
95
+ "site_code",
96
+ "period_start",
97
+ "date_only",
98
+ "Longitude",
99
+ "Latitude",
100
+ "City",
101
+ "RAT",
102
+ "ID",
103
+ }
104
+
105
+ out = base
106
+ for rat in present:
107
+ df = daily_by_rat.get(rat)
108
+ if not isinstance(df, pd.DataFrame) or df.empty:
109
+ continue
110
+
111
+ nt = _normalize_time_key(df, granularity)
112
+ if nt is None:
113
+ continue
114
+ tc, tkey = nt
115
+
116
+ tmp = df.copy()
117
+ tmp["site_code"] = pd.to_numeric(tmp.get("site_code"), errors="coerce")
118
+ tmp = tmp.dropna(subset=["site_code"]).copy()
119
+ tmp["site_code"] = tmp["site_code"].astype(int)
120
+ tmp[tc] = tkey
121
+ tmp = tmp.dropna(subset=[tc]).copy()
122
+
123
+ kpi_cols = [c for c in tmp.columns if c not in meta_cols]
124
+ keep_cols = ["site_code", tc] + kpi_cols
125
+ tmp2 = tmp[keep_cols].copy()
126
+ rename = {c: f"{rat}_{c}" for c in kpi_cols}
127
+ tmp2 = tmp2.rename(columns=rename)
128
+ out = pd.merge(
129
+ out,
130
+ tmp2,
131
+ left_on=["site_code", time_col],
132
+ right_on=["site_code", tc],
133
+ how="left",
134
+ )
135
+ if tc != time_col and tc in out.columns:
136
+ out = out.drop(columns=[tc], errors="ignore")
137
+
138
+ first_cols = [
139
+ c
140
+ for c in ["ID", time_col, "site_code", "City", "Longitude", "Latitude"]
141
+ if c in out.columns
142
+ ]
143
+ rest = [c for c in out.columns if c not in first_cols]
144
+ out = out[first_cols + rest]
145
+ try:
146
+ out = out.sort_values(by=[time_col, "site_code"], ascending=[True, True])
147
+ except Exception:
148
+ pass
149
+
150
+ return f"{prefix}_All", out
151
+
152
+
153
+ def build_export_bytes(
154
+ datasets_df: pd.DataFrame | None,
155
+ rules_df: pd.DataFrame | None,
156
+ summary_df: pd.DataFrame | None,
157
+ status_df: pd.DataFrame | None,
158
+ daily_by_rat: dict[str, pd.DataFrame] | None = None,
159
+ granularity: str = "Daily",
160
+ multirat_summary_df: pd.DataFrame | None = None,
161
+ top_anomalies_df: pd.DataFrame | None = None,
162
+ complaint_multirat_df: pd.DataFrame | None = None,
163
+ complaint_top_anomalies_df: pd.DataFrame | None = None,
164
+ ops_queue_df: pd.DataFrame | None = None,
165
+ delta_df: pd.DataFrame | None = None,
166
+ profile: dict | None = None,
167
+ ) -> bytes:
168
+ if profile is not None:
169
+ profile["export_prep_seconds"] = 0.0
170
+ profile["excel_total_seconds"] = 0.0
171
+
172
+ t_prep0 = pd.Timestamp.utcnow() if profile is not None else None
173
+ dfs = [
174
+ datasets_df if isinstance(datasets_df, pd.DataFrame) else pd.DataFrame(),
175
+ rules_df if isinstance(rules_df, pd.DataFrame) else pd.DataFrame(),
176
+ summary_df if isinstance(summary_df, pd.DataFrame) else pd.DataFrame(),
177
+ status_df if isinstance(status_df, pd.DataFrame) else pd.DataFrame(),
178
+ ]
179
+
180
+ sheet_names = [
181
+ "Datasets",
182
+ "KPI_Rules",
183
+ "Site_Summary",
184
+ "Site_KPI_Status",
185
+ ]
186
+
187
+ max_data_rows = 1048575
188
+ if daily_by_rat and isinstance(daily_by_rat, dict):
189
+ g = str(granularity or "Daily").strip().lower()
190
+ prefix = "Hourly" if (g.startswith("hour") or g.startswith("h")) else "Daily"
191
+
192
+ combined = _build_all_tech_sheet(daily_by_rat, granularity)
193
+ if combined is not None:
194
+ base, df_all = combined
195
+ if len(df_all) <= max_data_rows:
196
+ dfs.append(df_all)
197
+ sheet_names.append(base[:31])
198
+ else:
199
+ part = 1
200
+ for start in range(0, len(df_all), max_data_rows):
201
+ end = min(start + max_data_rows, len(df_all))
202
+ dfs.append(df_all.iloc[start:end].copy())
203
+ sheet_names.append(f"{base}_p{part}"[:31])
204
+ part += 1
205
+ else:
206
+ for rat, df in daily_by_rat.items():
207
+ if not isinstance(df, pd.DataFrame):
208
+ continue
209
+ base = f"{prefix}_All_{str(rat)}"
210
+ if len(df) <= max_data_rows:
211
+ dfs.append(df)
212
+ sheet_names.append(base[:31])
213
+ else:
214
+ part = 1
215
+ for start in range(0, len(df), max_data_rows):
216
+ end = min(start + max_data_rows, len(df))
217
+ dfs.append(df.iloc[start:end].copy())
218
+ sheet_names.append(f"{base}_p{part}"[:31])
219
+ part += 1
220
+
221
+ dfs.extend(
222
+ [
223
+ (
224
+ multirat_summary_df
225
+ if isinstance(multirat_summary_df, pd.DataFrame)
226
+ else pd.DataFrame()
227
+ ),
228
+ (
229
+ top_anomalies_df
230
+ if isinstance(top_anomalies_df, pd.DataFrame)
231
+ else pd.DataFrame()
232
+ ),
233
+ (
234
+ complaint_multirat_df
235
+ if isinstance(complaint_multirat_df, pd.DataFrame)
236
+ else pd.DataFrame()
237
+ ),
238
+ (
239
+ complaint_top_anomalies_df
240
+ if isinstance(complaint_top_anomalies_df, pd.DataFrame)
241
+ else pd.DataFrame()
242
+ ),
243
+ ops_queue_df if isinstance(ops_queue_df, pd.DataFrame) else pd.DataFrame(),
244
+ delta_df if isinstance(delta_df, pd.DataFrame) else pd.DataFrame(),
245
+ ]
246
+ )
247
+ sheet_names.extend(
248
+ [
249
+ "MultiRAT_Summary",
250
+ "Top_Anomalies",
251
+ "Complaint_MultiRAT",
252
+ "Complaint_Top_Anomalies",
253
+ "Ops_Queue",
254
+ "Delta",
255
+ ]
256
+ )
257
+
258
+ if profile is not None:
259
+ t_prep1 = pd.Timestamp.utcnow()
260
+ if t_prep0 is not None:
261
+ profile["export_prep_seconds"] = float((t_prep1 - t_prep0).total_seconds())
262
+ profile["sheet_count"] = int(len(sheet_names))
263
+
264
+ return write_dfs_to_excel(dfs, sheet_names, index=False, profile=profile)
process_kpi/kpi_health_check/io.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import zipfile
3
+
4
+ import pandas as pd
5
+
6
+
7
+ def read_bytes_to_df(file_bytes: bytes, filename: str) -> pd.DataFrame:
8
+ if not file_bytes:
9
+ raise ValueError("Empty file")
10
+
11
+ filename_l = (filename or "").lower()
12
+ data = io.BytesIO(file_bytes)
13
+
14
+ if filename_l.endswith(".zip"):
15
+ with zipfile.ZipFile(data) as z:
16
+ csv_files = [f for f in z.namelist() if f.lower().endswith(".csv")]
17
+ if not csv_files:
18
+ raise ValueError("No CSV file found in the ZIP archive")
19
+ dfs = []
20
+ for csv_name in csv_files:
21
+ try:
22
+ with z.open(csv_name) as f:
23
+ df = pd.read_csv(
24
+ f,
25
+ encoding="latin1",
26
+ sep=";",
27
+ low_memory=False,
28
+ )
29
+ if isinstance(df, pd.DataFrame) and not df.empty:
30
+ dfs.append(df)
31
+ except Exception:
32
+ continue
33
+
34
+ if not dfs:
35
+ raise ValueError("No readable CSV content found in the ZIP archive")
36
+
37
+ if len(dfs) == 1:
38
+ return dfs[0]
39
+
40
+ return pd.concat(dfs, ignore_index=True, sort=False)
41
+
42
+ if filename_l.endswith(".csv"):
43
+ return pd.read_csv(data, encoding="latin1", sep=";", low_memory=False)
44
+
45
+ raise ValueError("Unsupported file format. Please upload a ZIP or CSV file.")
process_kpi/kpi_health_check/kpi_groups.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import pandas as pd
4
+
5
+ # Regex patterns for KPI classification
6
+ # Order matters: first match wins
7
+ PATTERNS = {
8
+ # Refined patterns based on user data
9
+ "Transmission": r"(?i)(abis|a-?bis|iub|x2|backhaul|transport|transmission|s1(?:\s|_)*sig(?:\s|_)*conn(?:\s|_)*sr)",
10
+ "Interference": r"(?i)(\brtwp\b|avg(?:\s|_)*rtwp|rtwp(?:\s|_)*rx(?:\s|_)*ant)",
11
+ "Mobility": r"(?i)(handover|(?<![A-Za-z0-9])ho(?![A-Za-z0-9])|soft(?:\s|_|-)*ho|intra(?:\s|_|-)*freq(?:\s|_|-)*ho|inter(?:\s|_|-)*freq(?:\s|_|-)*ho|csfb)",
12
+ "Success Rate": r"(?i)(cssr|success|attach|setup|establ|answer|complete|connected|ho.*succ|\berab\b|\brrc\b.*(?:\bsr\b|rate|succ)|\basr\b|\bsr\b)",
13
+ "Fails/Drop/Block": r"(?i)(drop|dcr|fail|block|reject|deny|loss|lost|discard|congestion|accessibility.*fail|retention.*fail)",
14
+ "Throughput": r"(?i)(throughput|thp|thrput|PDCP|debit|dl.*rate|ul.*rate|bitrate)",
15
+ "Traffic": r"(?i)(traffic|volume|erl|payload|gbytes|gb|load|usage|utilization)",
16
+ "Availability": r"(?i)(availability|avail|unavailability|unavail|dispo|disponibil|uptime)",
17
+ "Latency": r"(?i)(latency|delay|\brt\b|rtt)",
18
+ }
19
+
20
+
21
+ def classify_kpi(kpi_name: str) -> str:
22
+ """
23
+ Classifies a KPI name into a group based on regex patterns.
24
+ Returns 'Other' if no match found.
25
+ """
26
+ kpi_str = str(kpi_name)
27
+ for group, pattern in PATTERNS.items():
28
+ if re.search(pattern, kpi_str):
29
+ return group
30
+ return "Other"
31
+
32
+
33
+ def get_kpis_by_group(all_kpis: list[str]) -> dict[str, list[str]]:
34
+ """
35
+ Returns a dictionary mapping group names to lists of KPIs.
36
+ """
37
+ groups = {g: [] for g in PATTERNS.keys()}
38
+ groups["Other"] = []
39
+
40
+ for kpi in sorted(all_kpis):
41
+ group = classify_kpi(kpi)
42
+ groups[group].append(kpi)
43
+
44
+ # Remove empty groups
45
+ return {k: v for k, v in groups.items() if v}
46
+
47
+
48
+ def filter_kpis(
49
+ all_kpis: list[str],
50
+ group: str,
51
+ mode: str = "Filter",
52
+ top_n: int = 12,
53
+ stats_df: pd.DataFrame | None = None,
54
+ ) -> list[str]:
55
+ """
56
+ Filters KPIs based on the selected group and mode.
57
+
58
+ Args:
59
+ all_kpis: List of available KPI names.
60
+ group: Selected group name (or 'All').
61
+ mode: 'Filter' or 'Top-N'.
62
+ top_n: Max KPIs to return if filtering needs truncation or specific selection.
63
+ stats_df: Optional DataFrame with 'site_code', 'KPI', 'is_bad', etc. for sorting.
64
+ """
65
+ if not all_kpis:
66
+ return []
67
+
68
+ # 1. Filter by group
69
+ if group and group != "All (selected KPIs)":
70
+ # Handle "Success Rate (>= SLA...)" formatted names if passed from UI
71
+ clean_group = group.split(" (")[0]
72
+ # Basic mapping check - if the group name in UI has extra text, we match key prefix
73
+ target_group = "Other"
74
+ for k in PATTERNS.keys():
75
+ if k in group:
76
+ target_group = k
77
+ break
78
+ if "Other" in group:
79
+ target_group = "Other"
80
+
81
+ candidates = [k for k in all_kpis if classify_kpi(k) == target_group]
82
+ else:
83
+ candidates = list(all_kpis)
84
+
85
+ if not candidates:
86
+ return []
87
+
88
+ # 2. Sort/Limit if needed
89
+ # If we have stats, we can sort by "badness" or variance
90
+ # For now, simplistic alpha sort unless we have stats
91
+ if stats_df is not None and not stats_df.empty:
92
+ # TODO: Implement smart sorting based on stats if available
93
+ # For V1, we just return candidates sorted alphabetically
94
+ pass
95
+
96
+ return sorted(candidates)
process_kpi/kpi_health_check/multi_rat.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from process_kpi.kpi_health_check.kpi_groups import classify_kpi
4
+
5
+
6
+ def _slug(value: str) -> str:
7
+ s = str(value or "").strip().lower()
8
+ out = []
9
+ prev_underscore = False
10
+ for ch in s:
11
+ if ch.isalnum():
12
+ out.append(ch)
13
+ prev_underscore = False
14
+ else:
15
+ if not prev_underscore:
16
+ out.append("_")
17
+ prev_underscore = True
18
+ return "".join(out).strip("_")
19
+
20
+
21
+ def _fmt_num(value) -> str:
22
+ try:
23
+ v = pd.to_numeric(value, errors="coerce")
24
+ if pd.isna(v):
25
+ return "NA"
26
+ return f"{float(v):.3g}"
27
+ except Exception: # noqa: BLE001
28
+ return "NA"
29
+
30
+
31
+ def _build_rca_tags(row: dict) -> str:
32
+ tags: list[str] = []
33
+ group = str(row.get("rca_group") or "Other")
34
+ tags.append(_slug(group) if group else "other")
35
+
36
+ status = str(row.get("status") or "").strip().upper()
37
+ if status == "PERSISTENT_DEGRADED":
38
+ tags.append("persistent")
39
+ elif status == "DEGRADED":
40
+ tags.append("degraded")
41
+ elif status:
42
+ tags.append(_slug(status))
43
+
44
+ baseline = row.get("baseline_median")
45
+ recent = row.get("recent_median")
46
+ if pd.isna(pd.to_numeric(baseline, errors="coerce")):
47
+ tags.append("missing_baseline")
48
+ if pd.isna(pd.to_numeric(recent, errors="coerce")):
49
+ tags.append("missing_recent")
50
+
51
+ impact = pd.to_numeric(row.get("impacted_rats"), errors="coerce")
52
+ if pd.notna(impact) and float(impact) >= 2:
53
+ tags.append("multi_rat")
54
+
55
+ return ",".join([t for t in tags if t])
56
+
57
+
58
+ def _build_rca_hint(row: dict) -> str:
59
+ group = str(row.get("rca_group") or "Other")
60
+ kpi = str(row.get("KPI") or "")
61
+ rat = str(row.get("RAT") or "")
62
+ status = str(row.get("status") or "")
63
+ baseline_s = _fmt_num(row.get("baseline_median"))
64
+ recent_s = _fmt_num(row.get("recent_median"))
65
+ streak = int(pd.to_numeric(row.get("max_streak_recent"), errors="coerce") or 0)
66
+ bad = int(pd.to_numeric(row.get("bad_days_recent"), errors="coerce") or 0)
67
+ return (
68
+ f"{group} | {rat} | {kpi} | {status} | "
69
+ f"baseline={baseline_s} recent={recent_s} | streak={streak}d bad={bad}d"
70
+ )
71
+
72
+
73
+ def compute_multirat_views(
74
+ status_df: pd.DataFrame,
75
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
76
+ if status_df is None or status_df.empty:
77
+ return pd.DataFrame(), pd.DataFrame()
78
+
79
+ df = status_df.copy()
80
+ df["is_degraded"] = df["status"].isin(["DEGRADED", "PERSISTENT_DEGRADED"])
81
+ df["is_persistent"] = df["status"].isin(["PERSISTENT_DEGRADED"])
82
+ df["is_resolved"] = df["status"].isin(["RESOLVED"])
83
+
84
+ def _first_city(s: pd.Series):
85
+ s2 = s.dropna()
86
+ return s2.iloc[0] if not s2.empty else None
87
+
88
+ base = (
89
+ df.groupby("site_code", as_index=False)
90
+ .agg(
91
+ City=("City", _first_city),
92
+ degraded_kpis_total=("is_degraded", "sum"),
93
+ persistent_kpis_total=("is_persistent", "sum"),
94
+ resolved_kpis_total=("is_resolved", "sum"),
95
+ )
96
+ .copy()
97
+ )
98
+
99
+ impacted = (
100
+ df[df["is_degraded"]]
101
+ .groupby("site_code")["RAT"]
102
+ .nunique()
103
+ .rename("impacted_rats")
104
+ .reset_index()
105
+ )
106
+
107
+ resolved_pivot = (
108
+ df[df["is_resolved"]]
109
+ .pivot_table(
110
+ index="site_code",
111
+ columns="RAT",
112
+ values="KPI",
113
+ aggfunc="count",
114
+ fill_value=0,
115
+ )
116
+ .rename(columns=lambda c: f"resolved_{c}")
117
+ .reset_index()
118
+ )
119
+
120
+ base = pd.merge(base, impacted, on="site_code", how="left")
121
+ base["impacted_rats"] = base["impacted_rats"].fillna(0).astype(int)
122
+
123
+ degraded_pivot = (
124
+ df[df["is_degraded"]]
125
+ .pivot_table(
126
+ index="site_code",
127
+ columns="RAT",
128
+ values="KPI",
129
+ aggfunc="count",
130
+ fill_value=0,
131
+ )
132
+ .rename(columns=lambda c: f"degraded_{c}")
133
+ .reset_index()
134
+ )
135
+
136
+ persistent_pivot = (
137
+ df[df["is_persistent"]]
138
+ .pivot_table(
139
+ index="site_code",
140
+ columns="RAT",
141
+ values="KPI",
142
+ aggfunc="count",
143
+ fill_value=0,
144
+ )
145
+ .rename(columns=lambda c: f"persistent_{c}")
146
+ .reset_index()
147
+ )
148
+
149
+ out = base
150
+ if not degraded_pivot.empty:
151
+ out = pd.merge(out, degraded_pivot, on="site_code", how="left")
152
+ if not persistent_pivot.empty:
153
+ out = pd.merge(out, persistent_pivot, on="site_code", how="left")
154
+ if not resolved_pivot.empty:
155
+ out = pd.merge(out, resolved_pivot, on="site_code", how="left")
156
+
157
+ metric_cols = [c for c in out.columns if c != "City"]
158
+ out[metric_cols] = out[metric_cols].fillna(0)
159
+
160
+ resolved_total = (
161
+ out["resolved_kpis_total"].astype(float)
162
+ if "resolved_kpis_total" in out.columns
163
+ else 0.0
164
+ )
165
+ out["criticality_score"] = (
166
+ (
167
+ out["persistent_kpis_total"].astype(float) * 5.0
168
+ + out["degraded_kpis_total"].astype(float) * 2.0
169
+ + out["impacted_rats"].astype(float) * 1.0
170
+ + resolved_total * 0.5
171
+ )
172
+ .round(0)
173
+ .astype(int)
174
+ )
175
+
176
+ out = out.sort_values(
177
+ by=[
178
+ "criticality_score",
179
+ "persistent_kpis_total",
180
+ "degraded_kpis_total",
181
+ "impacted_rats",
182
+ ],
183
+ ascending=[False, False, False, False],
184
+ )
185
+
186
+ top = df[df["is_degraded"]].copy()
187
+ sev = {"PERSISTENT_DEGRADED": 2, "DEGRADED": 1}
188
+ top["severity"] = top["status"].map(sev).fillna(0).astype(int)
189
+
190
+ for col in ["bad_days_recent", "max_streak_recent"]:
191
+ if col not in top.columns:
192
+ top[col] = pd.NA
193
+
194
+ top["anomaly_score"] = (
195
+ (
196
+ top["severity"].astype(float) * 100.0
197
+ + pd.to_numeric(top["max_streak_recent"], errors="coerce")
198
+ .fillna(0)
199
+ .astype(float)
200
+ * 10.0
201
+ + pd.to_numeric(top["bad_days_recent"], errors="coerce")
202
+ .fillna(0)
203
+ .astype(float)
204
+ )
205
+ .round(0)
206
+ .astype(int)
207
+ )
208
+
209
+ top = top.sort_values(
210
+ by=["anomaly_score", "severity", "max_streak_recent", "bad_days_recent"],
211
+ ascending=[False, False, False, False],
212
+ )
213
+
214
+ try:
215
+ top = pd.merge(top, impacted, on="site_code", how="left")
216
+ top["impacted_rats"] = (
217
+ pd.to_numeric(top["impacted_rats"], errors="coerce").fillna(0).astype(int)
218
+ )
219
+ except Exception: # noqa: BLE001
220
+ top["impacted_rats"] = 0
221
+
222
+ top["rca_group"] = top["KPI"].apply(classify_kpi)
223
+ try:
224
+ top["rca_hint"] = top.apply(lambda r: _build_rca_hint(r.to_dict()), axis=1)
225
+ top["rca_tags"] = top.apply(lambda r: _build_rca_tags(r.to_dict()), axis=1)
226
+ except Exception: # noqa: BLE001
227
+ top["rca_hint"] = ""
228
+ top["rca_tags"] = ""
229
+
230
+ top_cols = [
231
+ c
232
+ for c in [
233
+ "anomaly_score",
234
+ "severity",
235
+ "RAT",
236
+ "site_code",
237
+ "City",
238
+ "KPI",
239
+ "rca_group",
240
+ "rca_tags",
241
+ "rca_hint",
242
+ "status",
243
+ "impacted_rats",
244
+ "baseline_median",
245
+ "recent_median",
246
+ "bad_days_recent",
247
+ "max_streak_recent",
248
+ ]
249
+ if c in top.columns
250
+ ]
251
+ top = top[top_cols].head(300)
252
+
253
+ return out, top
process_kpi/kpi_health_check/normalization.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from utils.utils_vars import get_physical_db
7
+
8
+
9
+ def to_numeric(series: pd.Series) -> pd.Series:
10
+ if pd.api.types.is_numeric_dtype(series):
11
+ return pd.to_numeric(series, errors="coerce")
12
+ s = series.astype(str)
13
+ s = s.str.replace("\u00a0", "", regex=False)
14
+ s = s.str.replace(" ", "", regex=False)
15
+ s = s.str.replace("%", "", regex=False)
16
+ s = s.replace({"nan": np.nan, "None": np.nan, "": np.nan})
17
+
18
+ has_comma = s.str.contains(",", na=False, regex=False)
19
+ has_dot = s.str.contains(".", na=False, regex=False)
20
+ both = has_comma & has_dot
21
+ if bool(both.any()):
22
+ last_comma = s.str.rfind(",")
23
+ last_dot = s.str.rfind(".")
24
+ euro = both & (last_comma > last_dot)
25
+ us = both & (last_dot > last_comma)
26
+ if bool(euro.any()):
27
+ s.loc[euro] = (
28
+ s.loc[euro]
29
+ .str.replace(".", "", regex=False)
30
+ .str.replace(",", ".", regex=False)
31
+ )
32
+ if bool(us.any()):
33
+ s.loc[us] = s.loc[us].str.replace(",", "", regex=False)
34
+
35
+ comma_only = has_comma & ~has_dot
36
+ if bool(comma_only.any()):
37
+ s.loc[comma_only] = s.loc[comma_only].str.replace(",", ".", regex=False)
38
+ return pd.to_numeric(s, errors="coerce")
39
+
40
+
41
+ def parse_datetime(series: pd.Series) -> pd.Series:
42
+ if series.empty:
43
+ return pd.to_datetime(series, errors="coerce")
44
+ first = series.dropna().astype(str).iloc[0] if series.dropna().any() else ""
45
+
46
+ formats: list[str | None] = []
47
+ if len(first) > 10:
48
+ formats.extend(
49
+ [
50
+ "%m.%d.%Y %H:%M:%S",
51
+ "%d.%m.%Y %H:%M:%S",
52
+ "%Y-%m-%d %H:%M:%S",
53
+ "%Y/%m/%d %H:%M:%S",
54
+ "%d/%m/%Y %H:%M:%S",
55
+ "%m/%d/%Y %H:%M:%S",
56
+ ]
57
+ )
58
+ formats.extend(
59
+ [
60
+ "%m.%d.%Y",
61
+ "%d.%m.%Y",
62
+ "%Y-%m-%d",
63
+ "%Y/%m/%d",
64
+ "%d/%m/%Y",
65
+ "%m/%d/%Y",
66
+ ]
67
+ )
68
+
69
+ for fmt in formats:
70
+ dt = pd.to_datetime(series, errors="coerce", format=fmt)
71
+ if dt.notna().any():
72
+ return dt
73
+
74
+ return pd.to_datetime(series, errors="coerce")
75
+
76
+
77
+ def extract_site_code(value: object) -> int | None:
78
+ if value is None or (isinstance(value, float) and np.isnan(value)):
79
+ return None
80
+ s = str(value)
81
+
82
+ # Prefer explicit node patterns when present (DN strings often contain multiple numbers).
83
+ for pat in [
84
+ r"(?:WBTS)\D*(\d{3,7})",
85
+ r"(?:LNBTS)\D*(\d{3,7})",
86
+ r"(?:BTS)\D*(\d{3,7})",
87
+ r"(?:BCF)\D*(\d{3,7})",
88
+ r"(?:MRBTS)\D*(\d{3,7})",
89
+ r"(?:SBTS)\D*(\d{3,7})",
90
+ ]:
91
+ m = re.search(pat, s, flags=re.IGNORECASE)
92
+ if m:
93
+ try:
94
+ return int(m.group(1))
95
+ except ValueError:
96
+ pass
97
+
98
+ # Fallback: accept 3-digit sites (common) while keeping the upper bound used previously.
99
+ m = re.search(r"(\d{3,7})", s)
100
+ if not m:
101
+ return None
102
+ try:
103
+ return int(m.group(1))
104
+ except ValueError:
105
+ return None
106
+
107
+
108
+ def infer_date_col(df: pd.DataFrame) -> str:
109
+ for c in ["PERIOD_START_TIME", "PERIOD_START_DATE", "date", "Date", "DATE"]:
110
+ if c in df.columns:
111
+ return c
112
+ raise ValueError("Cannot find a date column (expected PERIOD_START_TIME)")
113
+
114
+
115
+ def infer_id_col(df: pd.DataFrame, rat: str) -> str:
116
+ rat_candidates = {
117
+ "2G": ["BCF name", "BCF", "BTS name", "BSC name", "DN"],
118
+ "3G": ["WBTS name", "WBTS ID", "DN"],
119
+ "LTE": ["LNBTS name", "MRBTS/SBTS name", "DN"],
120
+ "TWAMP": ["MRBTS name", "MRBTS/SBTS name", "LNBTS name", "DN"],
121
+ }
122
+
123
+ candidates = [c for c in rat_candidates.get(rat, []) if c in df.columns]
124
+ if not candidates and "DN" in df.columns:
125
+ candidates = ["DN"]
126
+ if not candidates:
127
+ raise ValueError(f"Cannot infer an entity/site column for {rat} dataset")
128
+
129
+ physical_codes: set[int] | None = None
130
+ try:
131
+ physical = load_physical_db()
132
+ if not physical.empty and "code" in physical.columns:
133
+ physical_codes = set(
134
+ pd.to_numeric(physical["code"], errors="coerce")
135
+ .dropna()
136
+ .astype(int)
137
+ .tolist()
138
+ )
139
+ except Exception:
140
+ physical_codes = None
141
+
142
+ if not physical_codes:
143
+ return candidates[0]
144
+
145
+ best_col = candidates[0]
146
+ best_score = -1.0
147
+ for c in candidates:
148
+ sample = df[c].head(2000)
149
+ codes = sample.apply(extract_site_code)
150
+ non_null = float(codes.notna().mean()) if len(codes) else 0.0
151
+
152
+ if physical_codes:
153
+ match = (
154
+ float(codes.dropna().astype(int).isin(physical_codes).mean())
155
+ if codes.notna().any()
156
+ else 0.0
157
+ )
158
+ score = match * 10.0 + non_null
159
+ else:
160
+ score = non_null
161
+
162
+ if score > best_score:
163
+ best_score = score
164
+ best_col = c
165
+
166
+ return best_col
167
+
168
+
169
+ def non_kpi_identifier_cols(df: pd.DataFrame, rat: str) -> set[str]:
170
+ common = {
171
+ "DN",
172
+ "PLMN name",
173
+ "RNC name",
174
+ "BSC name",
175
+ "BCF name",
176
+ "MRBTS name",
177
+ "MRBTS/SBTS name",
178
+ "LNBTS name",
179
+ "WBTS name",
180
+ "WBTS ID",
181
+ }
182
+ rat_specific = {
183
+ "2G": {"BSC name", "BSC", "BCF name", "BCF", "BTS name"},
184
+ "3G": {"PLMN name", "RNC name", "WBTS name", "WBTS ID"},
185
+ "LTE": {"MRBTS/SBTS name", "LNBTS name"},
186
+ }
187
+ cols = set()
188
+ for c in common.union(rat_specific.get(rat, set())):
189
+ if c in df.columns:
190
+ cols.add(c)
191
+ return cols
192
+
193
+
194
+ def infer_agg(kpi: str) -> str:
195
+ k = str(kpi).lower()
196
+ if any(x in k for x in ["traffic", "volume", "erl", "total", "gbytes", "gb"]):
197
+ return "sum"
198
+ return "mean"
199
+
200
+
201
+ def _is_availability_kpi(kpi: str) -> bool:
202
+ k = str(kpi).strip().lower()
203
+ if not k:
204
+ return False
205
+ return any(
206
+ x in k
207
+ for x in [
208
+ "availability",
209
+ "avail",
210
+ "unavailability",
211
+ "unavail",
212
+ "dispo",
213
+ "disponibil",
214
+ "uptime",
215
+ ]
216
+ )
217
+
218
+
219
+ def load_physical_db() -> pd.DataFrame:
220
+ physical_db = get_physical_db().copy()
221
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
222
+ physical_db["code"] = pd.to_numeric(physical_db["code"], errors="coerce")
223
+ physical_db = physical_db.dropna(subset=["code"])
224
+ physical_db["code"] = physical_db["code"].astype(int)
225
+ keep = [
226
+ c for c in ["code", "Longitude", "Latitude", "City"] if c in physical_db.columns
227
+ ]
228
+ return physical_db[keep].drop_duplicates("code")
229
+
230
+
231
+ def build_period_kpi(
232
+ df_raw: pd.DataFrame,
233
+ rat: str,
234
+ granularity: str = "Daily",
235
+ ) -> tuple[pd.DataFrame, list[str]]:
236
+ df = df_raw.copy()
237
+ date_col = infer_date_col(df)
238
+ id_col = infer_id_col(df, rat)
239
+
240
+ df["date"] = parse_datetime(df[date_col])
241
+ df = df.dropna(subset=["date"])
242
+
243
+ g = str(granularity or "Daily").strip().lower()
244
+ if g.startswith("hour") or g.startswith("h"):
245
+ df["period_start"] = df["date"].dt.floor("h")
246
+ else:
247
+ df["period_start"] = df["date"].dt.floor("D")
248
+
249
+ df["site_code"] = df[id_col].apply(extract_site_code)
250
+ df = df.dropna(subset=["site_code"])
251
+ df["site_code"] = df["site_code"].astype(int)
252
+
253
+ meta = {date_col, id_col, "date", "site_code", "period_start"}
254
+ meta = meta.union(non_kpi_identifier_cols(df, rat))
255
+ candidate_cols = [c for c in df.columns if c not in meta]
256
+
257
+ numeric_cols: dict[str, pd.Series] = {}
258
+ for c in candidate_cols:
259
+ numeric_cols[c] = to_numeric(df[c])
260
+
261
+ numeric_df = pd.DataFrame(numeric_cols)
262
+ for c in list(numeric_df.columns):
263
+ if _is_availability_kpi(c):
264
+ numeric_df[c] = numeric_df[c].fillna(0.0)
265
+ kpi_cols = [c for c in numeric_df.columns if numeric_df[c].notna().any()]
266
+ if not kpi_cols:
267
+ raise ValueError(f"No numeric KPI columns detected for {rat}")
268
+
269
+ base = pd.concat(
270
+ [
271
+ df[["site_code", "period_start"]].reset_index(drop=True),
272
+ numeric_df[kpi_cols].reset_index(drop=True),
273
+ ],
274
+ axis=1,
275
+ )
276
+
277
+ agg_dict = {k: infer_agg(k) for k in kpi_cols}
278
+ out = base.groupby(["site_code", "period_start"], as_index=False).agg(agg_dict)
279
+ out["date_only"] = pd.to_datetime(out["period_start"]).dt.date
280
+
281
+ physical = load_physical_db()
282
+ if not physical.empty:
283
+ out = pd.merge(out, physical, left_on="site_code", right_on="code", how="left")
284
+ out = out.drop(columns=[c for c in ["code"] if c in out.columns])
285
+
286
+ out["RAT"] = rat
287
+
288
+ return out, kpi_cols
289
+
290
+
291
+ def build_daily_kpi(df_raw: pd.DataFrame, rat: str) -> tuple[pd.DataFrame, list[str]]:
292
+ return build_period_kpi(df_raw, rat, granularity="Daily")
process_kpi/kpi_health_check/presets.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from datetime import datetime
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def presets_dir() -> str:
9
+ root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
+ return os.path.join(root, "data", "kpi_health_check_presets")
11
+
12
+
13
+ def _safe_name(name: str) -> str:
14
+ s = (name or "").strip()
15
+ s = s.replace("..", "")
16
+ s = s.replace("/", "_").replace("\\", "_")
17
+ s = "_".join([p for p in s.split() if p])
18
+ return s
19
+
20
+
21
+ def list_presets() -> list[str]:
22
+ d = presets_dir()
23
+ if not os.path.isdir(d):
24
+ return []
25
+ out = []
26
+ for fn in os.listdir(d):
27
+ if fn.lower().endswith(".json"):
28
+ out.append(os.path.splitext(fn)[0])
29
+ return sorted(set(out))
30
+
31
+
32
+ def load_preset(name: str) -> pd.DataFrame:
33
+ d = presets_dir()
34
+ safe = _safe_name(name)
35
+ path = os.path.join(d, f"{safe}.json")
36
+ with open(path, "r", encoding="utf-8") as f:
37
+ obj = json.load(f)
38
+ rows = obj.get("rules", []) if isinstance(obj, dict) else []
39
+ df = pd.DataFrame(rows)
40
+ if not df.empty:
41
+ df["RAT"] = df["RAT"].astype(str)
42
+ df["KPI"] = df["KPI"].astype(str)
43
+ return df
44
+
45
+
46
+ def save_preset(name: str, rules_df: pd.DataFrame) -> str:
47
+ safe = _safe_name(name)
48
+ if not safe:
49
+ raise ValueError("Preset name is empty")
50
+
51
+ d = presets_dir()
52
+ os.makedirs(d, exist_ok=True)
53
+ path = os.path.join(d, f"{safe}.json")
54
+
55
+ df = rules_df.copy() if isinstance(rules_df, pd.DataFrame) else pd.DataFrame()
56
+ if df.empty:
57
+ raise ValueError("Rules dataframe is empty")
58
+
59
+ keep = [c for c in ["RAT", "KPI", "direction", "sla", "policy"] if c in df.columns]
60
+ df = df[keep].copy()
61
+
62
+ obj = {
63
+ "name": safe,
64
+ "saved_at": datetime.utcnow().isoformat() + "Z",
65
+ "rules": df.to_dict(orient="records"),
66
+ }
67
+
68
+ with open(path, "w", encoding="utf-8") as f:
69
+ json.dump(obj, f, ensure_ascii=False, indent=2)
70
+
71
+ return path
72
+
73
+
74
+ def delete_preset(name: str) -> None:
75
+ d = presets_dir()
76
+ safe = _safe_name(name)
77
+ path = os.path.join(d, f"{safe}.json")
78
+ if os.path.isfile(path):
79
+ os.remove(path)
process_kpi/kpi_health_check/profiles.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from datetime import datetime
4
+
5
+
6
+ def profiles_dir() -> str:
7
+ root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
+ return os.path.join(root, "data", "kpi_health_check_profiles")
9
+
10
+
11
+ def _safe_name(name: str) -> str:
12
+ s = (name or "").strip()
13
+ s = s.replace("..", "")
14
+ s = s.replace("/", "_").replace("\\", "_")
15
+ s = "_".join([p for p in s.split() if p])
16
+ return s
17
+
18
+
19
+ def list_profiles() -> list[str]:
20
+ d = profiles_dir()
21
+ if not os.path.isdir(d):
22
+ return []
23
+ out: list[str] = []
24
+ for fn in os.listdir(d):
25
+ if fn.lower().endswith(".json"):
26
+ out.append(os.path.splitext(fn)[0])
27
+ return sorted(set(out))
28
+
29
+
30
+ def load_profile(name: str) -> dict:
31
+ d = profiles_dir()
32
+ safe = _safe_name(name)
33
+ path = os.path.join(d, f"{safe}.json")
34
+ with open(path, "r", encoding="utf-8") as f:
35
+ obj = json.load(f)
36
+ if isinstance(obj, dict) and "config" in obj and isinstance(obj["config"], dict):
37
+ return obj["config"]
38
+ if isinstance(obj, dict):
39
+ return obj
40
+ return {}
41
+
42
+
43
+ def save_profile(name: str, config: dict) -> str:
44
+ safe = _safe_name(name)
45
+ if not safe:
46
+ raise ValueError("Profile name is empty")
47
+ if config is None or not isinstance(config, dict) or not config:
48
+ raise ValueError("Profile config is empty")
49
+
50
+ d = profiles_dir()
51
+ os.makedirs(d, exist_ok=True)
52
+ path = os.path.join(d, f"{safe}.json")
53
+
54
+ obj = {
55
+ "name": safe,
56
+ "saved_at": datetime.utcnow().isoformat() + "Z",
57
+ "config": config,
58
+ }
59
+
60
+ with open(path, "w", encoding="utf-8") as f:
61
+ json.dump(obj, f, ensure_ascii=False, indent=2)
62
+
63
+ return path
64
+
65
+
66
+ def delete_profile(name: str) -> None:
67
+ d = profiles_dir()
68
+ safe = _safe_name(name)
69
+ path = os.path.join(d, f"{safe}.json")
70
+ if os.path.isfile(path):
71
+ os.remove(path)
process_kpi/kpi_health_check/rules.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+ from functools import lru_cache
5
+
6
+
7
+ def _norm(value: str) -> str:
8
+ s = str(value or "").strip().lower()
9
+ s = re.sub(r"[^0-9a-z]+", " ", s)
10
+ s = re.sub(r"\s+", " ", s).strip()
11
+ return s
12
+
13
+
14
+ def _project_root() -> str:
15
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
16
+
17
+
18
+ def _load_curated_rows() -> list[dict]:
19
+ base_dir = os.path.join(_project_root(), "data", "kpi_health_check_presets")
20
+ candidates = [
21
+ os.path.join(base_dir, "presets_1.json"),
22
+ os.path.join(base_dir, "profil_1.json"),
23
+ ]
24
+
25
+ path = next((p for p in candidates if os.path.exists(p)), None)
26
+ if not path:
27
+ return []
28
+
29
+ try:
30
+ with open(path, "r", encoding="utf-8") as f:
31
+ txt = f.read()
32
+ txt = re.sub(r"\bNaN\b", "null", txt)
33
+ obj = json.loads(txt)
34
+ rows = obj.get("rules", []) if isinstance(obj, dict) else []
35
+ return rows if isinstance(rows, list) else []
36
+ except Exception: # noqa: BLE001
37
+ return []
38
+
39
+
40
+ @lru_cache(maxsize=1)
41
+ def _curated_rules_map() -> dict[tuple[str, str], dict]:
42
+ out: dict[tuple[str, str], dict] = {}
43
+ for r in _load_curated_rows():
44
+ if not isinstance(r, dict):
45
+ continue
46
+ rat = _norm(r.get("RAT"))
47
+ kpi = _norm(r.get("KPI"))
48
+ if not rat or not kpi:
49
+ continue
50
+
51
+ direction = str(r.get("direction") or "").strip()
52
+ policy_raw = str(r.get("policy") or "").strip().lower()
53
+ policy = policy_raw if policy_raw in {"enforce", "notify"} else None
54
+ sla_raw = r.get("sla", None)
55
+ try:
56
+ sla = float(sla_raw) if sla_raw is not None else None
57
+ except Exception: # noqa: BLE001
58
+ sla = None
59
+
60
+ out[(rat, kpi)] = {
61
+ "direction": direction or None,
62
+ "sla": sla,
63
+ "policy": policy,
64
+ }
65
+ return out
66
+
67
+
68
+ def _curated_rule(kpi: str, rat: str | None = None) -> dict | None:
69
+ if not kpi or not rat:
70
+ return None
71
+ key = (_norm(rat), _norm(kpi))
72
+ return _curated_rules_map().get(key)
73
+
74
+
75
+ def infer_kpi_direction(kpi: str, rat: str | None = None) -> str:
76
+ curated = _curated_rule(kpi, rat)
77
+ if curated and curated.get("direction"):
78
+ return str(curated["direction"])
79
+
80
+ k = _norm(kpi)
81
+ if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
82
+ return "lower_is_better"
83
+ lower_is_better = [
84
+ "drop",
85
+ "dcr",
86
+ "blocking",
87
+ "block",
88
+ "congestion",
89
+ "loss",
90
+ "lost",
91
+ "discard",
92
+ "rtwp",
93
+ "prb usage",
94
+ "usage",
95
+ "fail",
96
+ ]
97
+ if any(x in k for x in lower_is_better):
98
+ return "lower_is_better"
99
+ return "higher_is_better"
100
+
101
+
102
+ def infer_kpi_sla(kpi: str, direction: str, rat: str | None = None) -> float | None:
103
+ curated = _curated_rule(kpi, rat)
104
+ if curated and curated.get("sla") is not None:
105
+ try:
106
+ return float(curated["sla"])
107
+ except Exception: # noqa: BLE001
108
+ pass
109
+
110
+ k = _norm(kpi)
111
+ if _norm(rat) == "twamp" and any(x in k for x in ["lost", "loss"]):
112
+ return 1000.0
113
+ if direction == "higher_is_better" and any(
114
+ x in k for x in ["availability", "cssr", "success", " sr"]
115
+ ):
116
+ return 98.0
117
+ if direction == "lower_is_better" and any(
118
+ x in k for x in ["drop", "dcr", "blocking", "congestion", "loss", "discard"]
119
+ ):
120
+ return 2.0
121
+ return None
122
+
123
+
124
+ def infer_kpi_policy(kpi: str, rat: str | None = None) -> str:
125
+ curated = _curated_rule(kpi, rat)
126
+ if curated and curated.get("policy"):
127
+ return str(curated["policy"])
128
+
129
+ k = _norm(kpi)
130
+ if "distance" in k:
131
+ return "notify"
132
+ return "enforce"
process_kpi/lte_kpi_requirements.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LTE CAPACITY REPORT
2
+
3
+ Based on gsm and wcdma exemple let's build LTE capacity report
4
+
5
+ ## Required Input
6
+
7
+ - File : LTE BH report with columns :
8
+ - PERIOD_START_TIME
9
+ - MRBTS/SBTS name
10
+ - LNBTS name
11
+ - LNCEL name
12
+ - DN
13
+ - Cell Avail excl BLU
14
+ - E-UTRAN Avg PRB usage per TTI DL
15
+ - Number of last day for the analysis
16
+ - Number of days for threshold
17
+ - Availability threshold
18
+ - PRB usage per TTI DL threshold
19
+ - Max difference between PRB usage over cells of the same BTS
20
+
21
+ ### TASK
22
+
23
+ - Pivot KPI in BH report per KPI (Cell Avail excl BLU, E-UTRAN Avg PRB usage per TTI DL)
24
+ - Calculate Average and Max of PRB usage per TTI DL
25
+ - Calculate Average and Max of Cell Avail excl BLU
26
+ - Count number of Days with Cell Avail excl BLU below Availability threshold
27
+ - Count number of Days with PRB usage per TTI DL exceeded PRB usage per TTI DL threshold
28
+ - Create separate DF per sector and band based on LNCEL name
29
+ - _1_L800: column_name = Sector_1_L800
30
+ - _2_L800: column_name = Sector_2_L800
31
+ - _3_L800: column_name = Sector_3_L800
32
+ - _1_L1800: column_name = Sector_1_L1800
33
+ - _2_L1800: column_name = Sector_2_L1800
34
+ - _3_L1800: column_name = Sector_3_L1800
35
+ - _1_L2300: column_name = Sector_1_L2300
36
+ - _2_L2300: column_name = Sector_2_L2300
37
+ - _3_L2300: column_name = Sector_3_L2300
38
+ - _1_L2600: column_name = Sector_1_L2600
39
+ - _2_L2600: column_name = Sector_2_L2600
40
+ - _3_L2600: column_name = Sector_3_L2600
41
+ - _1S_L1800: column_name = Sector_1S_L1800
42
+ - _2S_L1800: column_name = Sector_2S_L1800
43
+ - _3S_L1800: column_name = Sector_3S_L1800
44
+ - Merge DFs per sector LNBTS name
45
+ - Concat dfs per Bands
46
+
process_kpi/process_gsm_capacity.py ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from queries.process_gsm import combined_gsm_database
5
+ from utils.check_sheet_exist import execute_checks_sheets_exist
6
+ from utils.convert_to_excel import convert_dfs, save_dataframe
7
+ from utils.kpi_analysis_utils import (
8
+ GsmAnalysis,
9
+ GsmCapacity,
10
+ analyze_sdcch_call_blocking,
11
+ analyze_tch_abis_fails,
12
+ analyze_tch_call_blocking,
13
+ cell_availability_analysis,
14
+ combine_comments,
15
+ create_daily_date,
16
+ create_dfs_per_kpi,
17
+ create_hourly_date,
18
+ kpi_naming_cleaning,
19
+ )
20
+ from utils.utils_functions import calculate_distances
21
+
22
+ GSM_ANALYSIS_COLUMNS = [
23
+ "ID_BTS",
24
+ "site_name",
25
+ "name",
26
+ "BSC",
27
+ "BCF",
28
+ "BTS",
29
+ "code",
30
+ "Region",
31
+ "adminState",
32
+ "frequencyBandInUse",
33
+ "cellId",
34
+ "band",
35
+ "site_config_band",
36
+ "trxRfPower",
37
+ "BCCH",
38
+ "Longitude",
39
+ "Latitude",
40
+ "TRX_TCH",
41
+ "MAL_TCH",
42
+ "amrSegLoadDepTchRateLower",
43
+ "amrSegLoadDepTchRateUpper",
44
+ "btsSpLoadDepTchRateLower",
45
+ "btsSpLoadDepTchRateUpper",
46
+ "amrWbFrCodecModeSet",
47
+ "dedicatedGPRScapacity",
48
+ "defaultGPRScapacity",
49
+ "number_trx_per_cell",
50
+ "number_trx_per_bcf",
51
+ "number_tch_per_cell",
52
+ "number_sd_per_cell",
53
+ "number_bcch_per_cell",
54
+ "number_ccch_per_cell",
55
+ "number_cbc_per_cell",
56
+ "number_total_channels_per_cell",
57
+ "number_signals_per_cell",
58
+ "hf_rate_coef",
59
+ "GPRS",
60
+ "TCH Actual HR%",
61
+ "Offered Traffic BH",
62
+ "Max_Traffic BH",
63
+ "Avg_Traffic BH",
64
+ "TCH UTILIZATION (@Max Traffic)",
65
+ "Tch utilization comments",
66
+ "ErlabngB_value",
67
+ "Target FR CHs",
68
+ "Target HR CHs",
69
+ "Target TCHs",
70
+ "Target TRXs",
71
+ "Number of required TRXs",
72
+ "max_tch_call_blocking_bh",
73
+ "avg_tch_call_blocking_bh",
74
+ "number_of_days_with_tch_blocking_exceeded_bh",
75
+ "tch_call_blocking_bh_comment",
76
+ "max_sdcch_real_blocking_bh",
77
+ "avg_sdcch_real_blocking_bh",
78
+ "number_of_days_with_sdcch_blocking_exceeded_bh",
79
+ "sdcch_real_blocking_bh_comment",
80
+ "Average_cell_availability_bh",
81
+ "number_of_days_exceeding_availability_threshold_bh",
82
+ "availability_comment_bh",
83
+ "max_tch_abis_fail_bh",
84
+ "avg_tch_abis_fail_bh",
85
+ "number_of_days_with_tch_abis_fail_exceeded_bh",
86
+ "tch_abis_fail_bh_comment",
87
+ "Average_cell_availability_daily",
88
+ "number_of_days_exceeding_availability_threshold_daily",
89
+ "availability_comment_daily",
90
+ "max_tch_abis_fail_daily",
91
+ "avg_tch_abis_fail_daily",
92
+ "number_of_days_with_tch_abis_fail_exceeded_daily",
93
+ "tch_abis_fail_daily_comment",
94
+ "BH Congestion status",
95
+ "operational_comment",
96
+ "Final comment",
97
+ "Final comment summary",
98
+ ]
99
+
100
+ OPERATIONAL_NEIGHBOURS_COLUMNS = [
101
+ "ID_BTS",
102
+ "name",
103
+ "operational_comment",
104
+ "BH Congestion status",
105
+ "Longitude",
106
+ "Latitude",
107
+ ]
108
+
109
+ GSM_COLUMNS = [
110
+ "ID_BTS",
111
+ "site_name",
112
+ "name",
113
+ "BSC",
114
+ "BCF",
115
+ "BTS",
116
+ "code",
117
+ "Region",
118
+ "adminState",
119
+ "frequencyBandInUse",
120
+ "amrSegLoadDepTchRateLower",
121
+ "amrSegLoadDepTchRateUpper",
122
+ "btsSpLoadDepTchRateLower",
123
+ "btsSpLoadDepTchRateUpper",
124
+ "amrWbFrCodecModeSet",
125
+ "dedicatedGPRScapacity",
126
+ "defaultGPRScapacity",
127
+ "cellId",
128
+ "band",
129
+ "site_config_band",
130
+ "trxRfPower",
131
+ "BCCH",
132
+ "number_trx_per_cell",
133
+ "number_trx_per_bcf",
134
+ "TRX_TCH",
135
+ "MAL_TCH",
136
+ "Longitude",
137
+ "Latitude",
138
+ ]
139
+
140
+ TRX_COLUMNS = [
141
+ "ID_BTS",
142
+ "number_tch_per_cell",
143
+ "number_sd_per_cell",
144
+ "number_bcch_per_cell",
145
+ "number_ccch_per_cell",
146
+ "number_cbc_per_cell",
147
+ "number_total_channels_per_cell",
148
+ "number_signals_per_cell",
149
+ ]
150
+
151
+ KPI_COLUMNS = [
152
+ "date",
153
+ "BTS_name",
154
+ "TCH_availability_ratio",
155
+ "2G_Carried_Traffic",
156
+ "TCH_call_blocking",
157
+ "TCH_ABIS_FAIL_CALL_c001084",
158
+ "SDCCH_real_blocking",
159
+ ]
160
+ BH_COLUMNS_FOR_CAPACITY = [
161
+ "Max_Traffic BH",
162
+ "Avg_Traffic BH",
163
+ "max_tch_call_blocking_bh",
164
+ "avg_tch_call_blocking_bh",
165
+ "number_of_days_with_tch_blocking_exceeded_bh",
166
+ "tch_call_blocking_bh_comment",
167
+ "max_sdcch_real_blocking_bh",
168
+ "avg_sdcch_real_blocking_bh",
169
+ "number_of_days_with_sdcch_blocking_exceeded_bh",
170
+ "sdcch_real_blocking_bh_comment",
171
+ "Average_cell_availability_bh",
172
+ "number_of_days_exceeding_availability_threshold_bh",
173
+ "availability_comment_bh",
174
+ "max_tch_abis_fail_bh",
175
+ "avg_tch_abis_fail_bh",
176
+ "number_of_days_with_tch_abis_fail_exceeded_bh",
177
+ "tch_abis_fail_bh_comment",
178
+ ]
179
+
180
+ DAILY_COLUMNS_FOR_CAPACITY = [
181
+ "Average_cell_availability_daily",
182
+ "number_of_days_exceeding_availability_threshold_daily",
183
+ "availability_comment_daily",
184
+ "max_tch_abis_fail_daily",
185
+ "avg_tch_abis_fail_daily",
186
+ "number_of_days_with_tch_abis_fail_exceeded_daily",
187
+ "tch_abis_fail_daily_comment",
188
+ ]
189
+
190
+
191
+ def bh_traffic_analysis(
192
+ df: pd.DataFrame,
193
+ number_of_kpi_days: int,
194
+ ) -> pd.DataFrame:
195
+
196
+ result_df = df.copy()
197
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
198
+ # last_days_df = last_days_df.fillna(0)
199
+
200
+ result_df["Avg_Traffic BH"] = last_days_df.mean(axis=1).round(2)
201
+ result_df["Max_Traffic BH"] = last_days_df.max(axis=1)
202
+ return result_df
203
+
204
+
205
+ def bh_dfs_per_kpi(
206
+ df: pd.DataFrame,
207
+ number_of_kpi_days: int = 7,
208
+ tch_blocking_threshold: int = 0.50,
209
+ sdcch_blocking_threshold: int = 0.50,
210
+ number_of_threshold_days: int = 3,
211
+ tch_abis_fails_threshold: int = 10,
212
+ availability_threshold: int = 95,
213
+ ) -> pd.DataFrame:
214
+ """
215
+ Create pivoted DataFrames for each KPI and perform analysis.
216
+
217
+ Args:
218
+ df: DataFrame containing KPI data
219
+ number_of_kpi_days: Number of days to analyze
220
+ threshold: Utilization threshold percentage for flagging
221
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
222
+
223
+ Returns:
224
+ DataFrame with combined analysis results
225
+ """
226
+ pivoted_kpi_dfs = {}
227
+
228
+ pivoted_kpi_dfs = create_dfs_per_kpi(
229
+ df=df,
230
+ pivot_date_column="date",
231
+ pivot_name_column="BTS_name",
232
+ kpi_columns_from=2,
233
+ )
234
+
235
+ tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
236
+ sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
237
+ Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
238
+ tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
239
+ tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
240
+
241
+ # ANALISYS
242
+
243
+ tch_call_blocking_df = analyze_tch_call_blocking(
244
+ df=tch_call_blocking_df,
245
+ number_of_kpi_days=number_of_kpi_days,
246
+ number_of_threshold_days=number_of_threshold_days,
247
+ tch_blocking_threshold=tch_blocking_threshold,
248
+ analysis_type="BH",
249
+ )
250
+
251
+ sdcch_real_blocking_df = analyze_sdcch_call_blocking(
252
+ df=sdcch_real_blocking_df,
253
+ number_of_kpi_days=number_of_kpi_days,
254
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
255
+ number_of_threshold_days=number_of_threshold_days,
256
+ analysis_type="BH",
257
+ )
258
+
259
+ Carried_Traffic_df = bh_traffic_analysis(
260
+ df=Carried_Traffic_df,
261
+ number_of_kpi_days=number_of_kpi_days,
262
+ )
263
+
264
+ tch_abis_fails_df = analyze_tch_abis_fails(
265
+ df=tch_abis_fails_df,
266
+ number_of_kpi_days=number_of_kpi_days,
267
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
268
+ number_of_threshold_days=number_of_threshold_days,
269
+ analysis_type="BH",
270
+ )
271
+ tch_availability_ratio_df = cell_availability_analysis(
272
+ df=tch_availability_ratio_df,
273
+ days=number_of_kpi_days,
274
+ availability_threshold=availability_threshold,
275
+ analysis_type="BH",
276
+ )
277
+
278
+ bh_kpi_df = pd.concat(
279
+ [
280
+ Carried_Traffic_df,
281
+ tch_call_blocking_df,
282
+ sdcch_real_blocking_df,
283
+ tch_availability_ratio_df,
284
+ tch_abis_fails_df,
285
+ ],
286
+ axis=1,
287
+ )
288
+ return bh_kpi_df
289
+
290
+
291
+ def analyse_bh_data(
292
+ bh_report_path: str,
293
+ number_of_kpi_days: int,
294
+ tch_blocking_threshold: int,
295
+ sdcch_blocking_threshold: int,
296
+ number_of_threshold_days: int,
297
+ tch_abis_fails_threshold: int,
298
+ availability_threshold: int,
299
+ ) -> pd.DataFrame:
300
+ df = pd.read_csv(bh_report_path, delimiter=";")
301
+ df = kpi_naming_cleaning(df)
302
+ df = create_hourly_date(df)
303
+ df = df[KPI_COLUMNS]
304
+ df = bh_dfs_per_kpi(
305
+ df=df,
306
+ number_of_kpi_days=number_of_kpi_days,
307
+ tch_blocking_threshold=tch_blocking_threshold,
308
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
309
+ number_of_threshold_days=number_of_threshold_days,
310
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
311
+ availability_threshold=availability_threshold,
312
+ )
313
+
314
+ bh_df_for_capacity = df.copy()
315
+ bh_df_for_capacity = bh_df_for_capacity[BH_COLUMNS_FOR_CAPACITY]
316
+ bh_df_for_capacity = bh_df_for_capacity.reset_index()
317
+
318
+ # If columns have multiple levels (MultiIndex), flatten them
319
+ if isinstance(bh_df_for_capacity.columns, pd.MultiIndex):
320
+ bh_df_for_capacity.columns = [
321
+ "_".join([str(el) for el in col if el])
322
+ for col in bh_df_for_capacity.columns.values
323
+ ]
324
+ # bh_df_for_capacity = bh_df_for_capacity.reset_index()
325
+
326
+ # rename Bts_name to name
327
+ bh_df_for_capacity = bh_df_for_capacity.rename(columns={"BTS_name": "name"})
328
+
329
+ return [bh_df_for_capacity, df]
330
+
331
+
332
+ def daily_dfs_per_kpi(
333
+ df: pd.DataFrame,
334
+ number_of_kpi_days: int = 7,
335
+ availability_threshold: int = 95,
336
+ number_of_threshold_days: int = 3,
337
+ tch_abis_fails_threshold: int = 10,
338
+ sdcch_blocking_threshold: int = 0.5,
339
+ tch_blocking_threshold: int = 0.5,
340
+ ) -> pd.DataFrame:
341
+ """
342
+ Create pivoted DataFrames for each KPI and perform analysis.
343
+
344
+ Args:
345
+ df: DataFrame containing KPI data
346
+ number_of_kpi_days: Number of days to analyze
347
+ threshold: Utilization threshold percentage for flagging
348
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
349
+
350
+ Returns:
351
+ DataFrame with combined analysis results
352
+ """
353
+ pivoted_kpi_dfs = {}
354
+
355
+ pivoted_kpi_dfs = create_dfs_per_kpi(
356
+ df=df,
357
+ pivot_date_column="date",
358
+ pivot_name_column="BTS_name",
359
+ kpi_columns_from=2,
360
+ )
361
+
362
+ tch_call_blocking_df: pd.DataFrame = pivoted_kpi_dfs["TCH_call_blocking"]
363
+ sdcch_real_blocking_df: pd.DataFrame = pivoted_kpi_dfs["SDCCH_real_blocking"]
364
+ Carried_Traffic_df: pd.DataFrame = pivoted_kpi_dfs["2G_Carried_Traffic"]
365
+ tch_availability_ratio_df: pd.DataFrame = pivoted_kpi_dfs["TCH_availability_ratio"]
366
+ tch_abis_fails_df: pd.DataFrame = pivoted_kpi_dfs["TCH_ABIS_FAIL_CALL_c001084"]
367
+
368
+ tch_availability_ratio_df = cell_availability_analysis(
369
+ df=tch_availability_ratio_df,
370
+ days=number_of_kpi_days,
371
+ availability_threshold=availability_threshold,
372
+ )
373
+ sdcch_real_blocking_df = analyze_sdcch_call_blocking(
374
+ df=sdcch_real_blocking_df,
375
+ number_of_kpi_days=number_of_kpi_days,
376
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
377
+ number_of_threshold_days=number_of_threshold_days,
378
+ analysis_type="Daily",
379
+ )
380
+ tch_call_blocking_df = analyze_tch_call_blocking(
381
+ df=tch_call_blocking_df,
382
+ number_of_kpi_days=number_of_kpi_days,
383
+ number_of_threshold_days=number_of_threshold_days,
384
+ tch_blocking_threshold=tch_blocking_threshold,
385
+ analysis_type="Daily",
386
+ )
387
+ tch_abis_fails_df = analyze_tch_abis_fails(
388
+ df=tch_abis_fails_df,
389
+ number_of_kpi_days=number_of_kpi_days,
390
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
391
+ number_of_threshold_days=number_of_threshold_days,
392
+ analysis_type="Daily",
393
+ )
394
+
395
+ daily_kpi_df = pd.concat(
396
+ [
397
+ tch_availability_ratio_df,
398
+ Carried_Traffic_df,
399
+ tch_call_blocking_df,
400
+ sdcch_real_blocking_df,
401
+ tch_abis_fails_df,
402
+ ],
403
+ axis=1,
404
+ )
405
+
406
+ daily_kpi_df = combine_comments(
407
+ daily_kpi_df,
408
+ "availability_comment_daily",
409
+ "tch_abis_fail_daily_comment",
410
+ "sdcch_real_blocking_daily_comment",
411
+ new_column="sdcch_comments",
412
+ )
413
+
414
+ daily_kpi_df = combine_comments(
415
+ daily_kpi_df,
416
+ "availability_comment_daily",
417
+ "tch_abis_fail_daily_comment",
418
+ "tch_call_blocking_daily_comment",
419
+ new_column="tch_comments",
420
+ )
421
+ return daily_kpi_df
422
+
423
+
424
+ def analyse_daily_data(
425
+ daily_report_path: str,
426
+ number_of_kpi_days: int,
427
+ tch_abis_fails_threshold: int,
428
+ availability_threshold: int,
429
+ number_of_threshold_days: int,
430
+ sdcch_blocking_threshold: int,
431
+ tch_blocking_threshold: int,
432
+ ) -> pd.DataFrame:
433
+ df = pd.read_csv(daily_report_path, delimiter=";")
434
+ df = kpi_naming_cleaning(df)
435
+ df = create_daily_date(df)
436
+ df = df[KPI_COLUMNS]
437
+ df = daily_dfs_per_kpi(
438
+ df=df,
439
+ number_of_kpi_days=number_of_kpi_days,
440
+ availability_threshold=availability_threshold,
441
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
442
+ number_of_threshold_days=number_of_threshold_days,
443
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
444
+ tch_blocking_threshold=tch_blocking_threshold,
445
+ )
446
+ daily_df_for_capacity = df.copy()
447
+ daily_df_for_capacity = daily_df_for_capacity[DAILY_COLUMNS_FOR_CAPACITY]
448
+ daily_df_for_capacity = daily_df_for_capacity.reset_index()
449
+
450
+ if isinstance(daily_df_for_capacity.columns, pd.MultiIndex):
451
+ daily_df_for_capacity.columns = [
452
+ "_".join([str(el) for el in col if el])
453
+ for col in daily_df_for_capacity.columns.values
454
+ ]
455
+ # Rename "BTS_name" to "name"
456
+ daily_df_for_capacity = daily_df_for_capacity.rename(columns={"BTS_name": "name"})
457
+
458
+ return daily_df_for_capacity, df
459
+
460
+
461
+ def get_gsm_databases(dump_path: str) -> pd.DataFrame:
462
+
463
+ dfs = combined_gsm_database(dump_path)
464
+ bts_df: pd.DataFrame = dfs[0]
465
+ trx_df: pd.DataFrame = dfs[2]
466
+
467
+ # Clean GSM df
468
+ bts_df = bts_df[GSM_COLUMNS]
469
+ trx_df = trx_df[TRX_COLUMNS]
470
+
471
+ # Remove duplicate in TRX df
472
+ trx_df = trx_df.drop_duplicates(subset=["ID_BTS"])
473
+
474
+ gsm_df = pd.merge(bts_df, trx_df, on="ID_BTS", how="left")
475
+
476
+ # add hf_rate_coef
477
+ gsm_df["hf_rate_coef"] = gsm_df["amrSegLoadDepTchRateLower"].map(
478
+ GsmAnalysis.hf_rate_coef
479
+ )
480
+ # Add "GPRS" colomn equal to (dedicatedGPRScapacity * number_tch_per_cell)/100
481
+ gsm_df["GPRS"] = (
482
+ gsm_df["dedicatedGPRScapacity"] * gsm_df["number_tch_per_cell"]
483
+ ) / 100
484
+
485
+ # "TCH Actual HR%" equal to "number of TCH" multiplyed by "Coef HF rate"
486
+ gsm_df["TCH Actual HR%"] = gsm_df["number_tch_per_cell"] * gsm_df["hf_rate_coef"]
487
+
488
+ # Remove empty rows
489
+ gsm_df = gsm_df.dropna(subset=["TCH Actual HR%"])
490
+
491
+ # Get "Offered Traffic BH" by mapping approximate "TCH Actual HR%" to 2G analysis_utility "erlangB" dict
492
+ gsm_df["Offered Traffic BH"] = gsm_df["TCH Actual HR%"].apply(
493
+ lambda x: GsmAnalysis.erlangB_table.get(int(x), 0)
494
+ )
495
+
496
+ return gsm_df
497
+
498
+
499
+ def get_operational_neighbours(distance: int) -> pd.DataFrame:
500
+
501
+ operational_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
502
+ operational_df = operational_df[
503
+ ["ID_BTS", "name", "operational_comment", "Longitude", "Latitude"]
504
+ ]
505
+ # keep row only if column "operational_comment" is not "Operational is OK"
506
+ operational_df = operational_df[
507
+ operational_df["operational_comment"] != "Operational is OK"
508
+ ]
509
+ operational_df = operational_df[
510
+ operational_df[["Latitude", "Longitude"]].notna().all(axis=1)
511
+ ]
512
+
513
+ # Rename all columns in operational_df by adding "Dataset2_" prefix
514
+ operational_df = operational_df.add_prefix("Dataset2_")
515
+
516
+ congested_df: pd.DataFrame = GsmCapacity.operational_neighbours_df
517
+ congested_df = congested_df[
518
+ ["ID_BTS", "name", "BH Congestion status", "Longitude", "Latitude"]
519
+ ]
520
+
521
+ # Remove rows where "BH Congestion status" is empty or NaN
522
+ congested_df = congested_df[
523
+ congested_df["BH Congestion status"].notna()
524
+ & congested_df["BH Congestion status"].astype(str).str.len().astype(bool)
525
+ ]
526
+ # Remove rows where "BH Congestion status" is "nan, nan"
527
+ congested_df = congested_df[congested_df["BH Congestion status"] != "nan, nan"]
528
+
529
+ # Remove rows where Latitude and Longitude are empty
530
+ congested_df = congested_df[
531
+ congested_df[["Latitude", "Longitude"]].notna().all(axis=1)
532
+ ]
533
+
534
+ # Rename all columns in congested_df by adding "Dataset1_" prefix
535
+ congested_df = congested_df.add_prefix("Dataset1_")
536
+
537
+ distances_dfs = calculate_distances(
538
+ congested_df,
539
+ operational_df,
540
+ "Dataset1_ID_BTS",
541
+ "Dataset1_Latitude",
542
+ "Dataset1_Longitude",
543
+ "Dataset2_ID_BTS",
544
+ "Dataset2_Latitude",
545
+ "Dataset2_Longitude",
546
+ )
547
+ distances_df = distances_dfs[0]
548
+ df1 = distances_df[distances_df["Distance_km"] <= distance]
549
+
550
+ # Rename all columns in df1
551
+ df1 = df1.rename(
552
+ columns={
553
+ "Dataset1_ID_BTS": "Source_ID_BTS",
554
+ "Dataset1_name": "Source_name",
555
+ "Dataset1_BH Congestion status": "Source_BH Congestion status",
556
+ "Dataset1_Longitude": "Source_Longitude",
557
+ "Dataset1_Latitude": "Source_Latitude",
558
+ "Dataset2_ID_BTS_Dataset2": "Neighbour_ID_BTS",
559
+ "Dataset2_name_Dataset2": "Neighbour_name",
560
+ "Dataset2_operational_comment_Dataset2": "Neighbour_operational_comment",
561
+ "Dataset2_Longitude_Dataset2": "Neighbour_Longitude",
562
+ "Dataset2_Latitude_Dataset2": "Neighbour_Latitude",
563
+ }
564
+ )
565
+
566
+ # Remove rows if Source_name = Neighbour_name
567
+ df1 = df1[df1["Source_name"] != df1["Neighbour_name"]]
568
+
569
+ # Reset index
570
+ df1 = df1.reset_index(drop=True)
571
+ return df1
572
+
573
+
574
+ def analyze_gsm_data(
575
+ dump_path: str,
576
+ daily_report_path: str,
577
+ bh_report_path: str,
578
+ number_of_kpi_days: int,
579
+ number_of_threshold_days: int,
580
+ availability_threshold: int,
581
+ tch_abis_fails_threshold: int,
582
+ sdcch_blocking_threshold: float,
583
+ tch_blocking_threshold: float,
584
+ max_traffic_threshold: int,
585
+ operational_neighbours_distance: int,
586
+ ):
587
+ GsmCapacity.operational_neighbours_df = None
588
+
589
+ daily_kpi_dfs: pd.DataFrame = analyse_daily_data(
590
+ daily_report_path=daily_report_path,
591
+ number_of_kpi_days=number_of_kpi_days,
592
+ availability_threshold=availability_threshold,
593
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
594
+ number_of_threshold_days=number_of_threshold_days,
595
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
596
+ tch_blocking_threshold=tch_blocking_threshold,
597
+ )
598
+
599
+ gsm_database_df: pd.DataFrame = get_gsm_databases(dump_path)
600
+
601
+ bh_kpi_dfs = analyse_bh_data(
602
+ bh_report_path=bh_report_path,
603
+ number_of_kpi_days=number_of_kpi_days,
604
+ tch_blocking_threshold=tch_blocking_threshold,
605
+ sdcch_blocking_threshold=sdcch_blocking_threshold,
606
+ number_of_threshold_days=number_of_threshold_days,
607
+ tch_abis_fails_threshold=tch_abis_fails_threshold,
608
+ availability_threshold=availability_threshold,
609
+ )
610
+
611
+ bh_kpi_df = bh_kpi_dfs[0]
612
+ bh_kpi_full_df = bh_kpi_dfs[1]
613
+
614
+ daily_kpi_df = daily_kpi_dfs[0]
615
+ daily_kpi_full_df = daily_kpi_dfs[1]
616
+
617
+ gsm_analysis_df = gsm_database_df.merge(bh_kpi_df, on="name", how="left")
618
+ gsm_analysis_df = gsm_analysis_df.merge(daily_kpi_df, on="name", how="left")
619
+
620
+ # "TCH UTILIZATION (@Max Traffic)" equal to "(Max_Trafic" divided by "Offered Traffic BH)*100"
621
+ gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] = (
622
+ gsm_analysis_df["Max_Traffic BH"] / gsm_analysis_df["Offered Traffic BH"]
623
+ ) * 100
624
+
625
+ # Add column "Tch utilization comments" : if "TCH UTILIZATION (@Max Traffic)" exceeded it's threshold then "Tch utilization exceeded threshold else None
626
+ gsm_analysis_df["Tch utilization comments"] = np.where(
627
+ gsm_analysis_df["TCH UTILIZATION (@Max Traffic)"] > max_traffic_threshold,
628
+ "Tch utilization exceeded threshold",
629
+ None,
630
+ )
631
+ # Add "BH Congestion status" : concatenate "Tch utilization comments" + "tch_call_blocking_bh_comment" + "sdcch_real_blocking_bh_comment"
632
+ gsm_analysis_df = combine_comments(
633
+ gsm_analysis_df,
634
+ "Tch utilization comments",
635
+ "tch_call_blocking_bh_comment",
636
+ "sdcch_real_blocking_bh_comment",
637
+ new_column="BH Congestion status",
638
+ )
639
+
640
+ # Add "ERLANGB value" =MAX TRAFFIC/(1-(MAX TCH call blocking/200))
641
+ gsm_analysis_df["ErlabngB_value"] = gsm_analysis_df["Max_Traffic BH"] / (
642
+ 1 - (gsm_analysis_df["max_tch_call_blocking_bh"] / 200)
643
+ )
644
+
645
+ # - Get "Target FR CHs" by mapping "ERLANG value" to 2G analysis_utility "erlangB" dict
646
+ gsm_analysis_df["Target FR CHs"] = gsm_analysis_df["ErlabngB_value"].apply(
647
+ lambda x: GsmAnalysis.erlangB_table.get(int(x) if pd.notnull(x) else 0, 0)
648
+ )
649
+
650
+ # "Target HR CHs" equal to "Target FR CHs" * 2
651
+ gsm_analysis_df["Target HR CHs"] = gsm_analysis_df["Target FR CHs"] * 2
652
+
653
+ # - Target TCHs equal to Target HR CHs + Signal + GPRS + SDCCH
654
+ gsm_analysis_df["Target TCHs"] = (
655
+ gsm_analysis_df["Target HR CHs"]
656
+ + gsm_analysis_df["number_signals_per_cell"]
657
+ + gsm_analysis_df["GPRS"]
658
+ + gsm_analysis_df["number_sd_per_cell"]
659
+ )
660
+ # "Target TRXs" equal to roundup(Target TCHs/8)
661
+ gsm_analysis_df["Target TRXs"] = np.ceil(
662
+ gsm_analysis_df["Target TCHs"] / 8
663
+ ) # df["Target TCHs"] / 8
664
+
665
+ # "Number of required TRXs" equal to difference between "Target TRXs" and "number_trx_per_cell"
666
+ gsm_analysis_df["Number of required TRXs"] = (
667
+ gsm_analysis_df["Target TRXs"] - gsm_analysis_df["number_trx_per_cell"]
668
+ )
669
+
670
+ # if "availability_comment_daily" equal to "Down Site" then "Down Site"
671
+ # if "availability_comment_daily" is not "Availability OK" and "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "Availability and TX issues"
672
+ # if "availability_comment_daily" is not "Availability OK" and "tch_abis_fail_daily_comment" is empty then "Availability issues"
673
+ # if "availability_comment_daily" is "Availability OK" and "tch_abis_fail_daily_comment" equal to "tch abis fail exceeded threshold" then "TX issues"
674
+ # Else "Operational is OK"
675
+ gsm_analysis_df["operational_comment"] = np.select(
676
+ [
677
+ gsm_analysis_df["availability_comment_daily"] == "Down Site", # 1
678
+ (gsm_analysis_df["availability_comment_daily"] != "Availability OK")
679
+ & (
680
+ gsm_analysis_df["tch_abis_fail_daily_comment"]
681
+ == "tch abis fail exceeded threshold"
682
+ ), # 2
683
+ (gsm_analysis_df["availability_comment_daily"] != "Availability OK")
684
+ & pd.isna(gsm_analysis_df["tch_abis_fail_daily_comment"]), # 3
685
+ (gsm_analysis_df["availability_comment_daily"] == "Availability OK")
686
+ & (
687
+ gsm_analysis_df["tch_abis_fail_daily_comment"]
688
+ == "tch abis fail exceeded threshold"
689
+ ), # 4
690
+ ],
691
+ [
692
+ "Down Site", # 1
693
+ "Availability and TX issues", # 2
694
+ "Availability issues", # 3
695
+ "TX issues", # 4
696
+ ],
697
+ default="Operational is OK",
698
+ )
699
+
700
+ # Add "Final comment" with "BH Congestion status" + "operational_comment"
701
+ gsm_analysis_df = combine_comments(
702
+ gsm_analysis_df,
703
+ "BH Congestion status",
704
+ "operational_comment",
705
+ new_column="Final comment",
706
+ )
707
+ # Map the final comment using final_comment_mapping
708
+ gsm_analysis_df["Final comment summary"] = gsm_analysis_df["Final comment"].map(
709
+ GsmCapacity.final_comment_mapping
710
+ )
711
+ gsm_analysis_df = gsm_analysis_df[GSM_ANALYSIS_COLUMNS]
712
+
713
+ GsmCapacity.operational_neighbours_df = gsm_analysis_df[
714
+ OPERATIONAL_NEIGHBOURS_COLUMNS
715
+ ]
716
+ distance_df = get_operational_neighbours(operational_neighbours_distance)
717
+
718
+ return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df, distance_df]
719
+ # return [gsm_analysis_df, bh_kpi_full_df, daily_kpi_full_df]
process_kpi/process_lcg_capacity.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from utils.kpi_analysis_utils import (
5
+ analyze_lcg_utilization,
6
+ combine_comments,
7
+ create_daily_date,
8
+ create_dfs_per_kpi,
9
+ kpi_naming_cleaning,
10
+ )
11
+ from utils.utils_vars import get_physical_db
12
+
13
+ lcg_comments_mapping = {
14
+ "2": "No Congestion",
15
+ "1": "No Congestion",
16
+ "lcg1 exceeded threshold, lcg2 exceeded threshold, 2": "Need BB SU upgrage",
17
+ "lcg1 exceeded threshold, 2": "Need LCG balancing",
18
+ "lcg1 exceeded threshold, 1": "Need BB SU upgrage",
19
+ "lcg2 exceeded threshold, 2": "Need LCG balancing",
20
+ }
21
+
22
+
23
+ KPI_COLUMNS = [
24
+ "date",
25
+ "WBTS_name",
26
+ "lcg_id",
27
+ "BB_SU_LCG_MAX_R",
28
+ ]
29
+
30
+ LCG_ANALYSIS_COLUMNS = [
31
+ "WBTS_name",
32
+ "lcg1_utilisation",
33
+ "avg_lcg1",
34
+ "max_lcg1",
35
+ "number_of_days_with_lcg1_exceeded",
36
+ "lcg1_comment",
37
+ "lcg2_utilisation",
38
+ "avg_lcg2",
39
+ "max_lcg2",
40
+ "number_of_days_with_lcg2_exceeded",
41
+ "lcg2_comment",
42
+ "difference_between_lcgs",
43
+ "difference_between_lcgs_comment",
44
+ "lcg_comment",
45
+ "number_of_lcg",
46
+ "final_comments",
47
+ ]
48
+
49
+
50
+ def lcg_kpi_analysis(
51
+ df,
52
+ num_last_days,
53
+ num_threshold_days,
54
+ lcg_utilization_threshold,
55
+ difference_between_lcgs,
56
+ ) -> pd.DataFrame:
57
+ """
58
+ Analyze LCG capacity data.
59
+
60
+ Args:
61
+ df: DataFrame containing LCG capacity data
62
+ num_last_days: Number of days for analysis
63
+ num_threshold_days: Minimum days above threshold to flag for upgrade
64
+ lcg_utilization_threshold: Utilization threshold percentage for flagging
65
+ difference_between_lcgs: Difference between LCGs for flagging
66
+
67
+ Returns:
68
+ Processed DataFrame with LCG capacity analysis results
69
+ """
70
+
71
+ lcg1_df = df[df["lcg_id"] == 1]
72
+ lcg2_df = df[df["lcg_id"] == 2]
73
+
74
+ pivoted_kpi_dfs = create_dfs_per_kpi(
75
+ df=df,
76
+ pivot_date_column="date",
77
+ pivot_name_column="WBTS_name",
78
+ kpi_columns_from=2,
79
+ )
80
+
81
+ pivoted_lcg1_df = create_dfs_per_kpi(
82
+ df=lcg1_df,
83
+ pivot_date_column="date",
84
+ pivot_name_column="WBTS_name",
85
+ kpi_columns_from=2,
86
+ )
87
+ pivoted_lcg2_df = create_dfs_per_kpi(
88
+ df=lcg2_df,
89
+ pivot_date_column="date",
90
+ pivot_name_column="WBTS_name",
91
+ kpi_columns_from=2,
92
+ )
93
+
94
+ # BB_SU_LCG_MAX_R to have all site with LCG 1 and/ or LCG 2
95
+ BB_SU_LCG_MAX_R_df = pivoted_kpi_dfs["BB_SU_LCG_MAX_R"]
96
+
97
+ pivoted_lcg1_df = pivoted_lcg1_df["BB_SU_LCG_MAX_R"]
98
+ pivoted_lcg2_df = pivoted_lcg2_df["BB_SU_LCG_MAX_R"]
99
+
100
+ # rename column
101
+ pivoted_lcg1_df = pivoted_lcg1_df.rename(
102
+ columns={"BB_SU_LCG_MAX_R": "lcg1_utilisation"}
103
+ )
104
+ pivoted_lcg2_df = pivoted_lcg2_df.rename(
105
+ columns={"BB_SU_LCG_MAX_R": "lcg2_utilisation"}
106
+ )
107
+
108
+ # analyze lcg utilization for each site per number_of_kpi_days and number_of_threshold_days
109
+ pivoted_lcg1_df = analyze_lcg_utilization(
110
+ df=pivoted_lcg1_df,
111
+ number_of_kpi_days=num_last_days,
112
+ number_of_threshold_days=num_threshold_days,
113
+ kpi_threshold=lcg_utilization_threshold,
114
+ kpi_column_name="lcg1",
115
+ )
116
+ pivoted_lcg2_df = analyze_lcg_utilization(
117
+ df=pivoted_lcg2_df,
118
+ number_of_kpi_days=num_last_days,
119
+ number_of_threshold_days=num_threshold_days,
120
+ kpi_threshold=lcg_utilization_threshold,
121
+ kpi_column_name="lcg2",
122
+ )
123
+ kpi_df = pd.concat(
124
+ [
125
+ BB_SU_LCG_MAX_R_df,
126
+ pivoted_lcg1_df,
127
+ pivoted_lcg2_df,
128
+ ],
129
+ axis=1,
130
+ )
131
+
132
+ kpi_df = kpi_df.reset_index()
133
+
134
+ # Number of available lcgs
135
+ # kpi_df = pd.merge(kpi_df, available_lcgs_df, on="WBTS_name", how="left")
136
+
137
+ # calculate difference between lcg1 and lcg2
138
+ kpi_df["difference_between_lcgs"] = kpi_df[["avg_lcg1", "avg_lcg2"]].apply(
139
+ lambda row: max(row) - min(row), axis=1
140
+ )
141
+
142
+ # flag if difference between lcg1 and lcg2 is above threshold
143
+ kpi_df["difference_between_lcgs_comment"] = np.where(
144
+ kpi_df["difference_between_lcgs"] > difference_between_lcgs,
145
+ "difference between lcgs exceeded threshold",
146
+ None,
147
+ )
148
+
149
+ # Combine comments
150
+ kpi_df = combine_comments(
151
+ kpi_df,
152
+ "lcg1_comment",
153
+ "lcg2_comment",
154
+ # "difference_between_lcgs_comment",
155
+ new_column="lcg_comment",
156
+ )
157
+
158
+ # Replace if "lcg_comment" contains "nan" and ", nan" and "nan, " with None
159
+ kpi_df["lcg_comment"] = kpi_df["lcg_comment"].replace("nan", None)
160
+
161
+ # Remove "nan" from comma-separated strings
162
+ kpi_df["lcg_comment"] = (
163
+ kpi_df["lcg_comment"].str.replace(r"\bnan\b,?\s?", "", regex=True).str.strip()
164
+ )
165
+
166
+ kpi_df["number_of_lcg"] = np.where(
167
+ kpi_df["avg_lcg1"].notna() & kpi_df["avg_lcg2"].notna(),
168
+ 2,
169
+ np.where(kpi_df["avg_lcg1"].notna() | kpi_df["avg_lcg2"].notna(), 1, 0),
170
+ )
171
+ # Combine comments
172
+ kpi_df = combine_comments(
173
+ kpi_df,
174
+ "lcg_comment",
175
+ "number_of_lcg",
176
+ new_column="final_comments",
177
+ )
178
+ kpi_df["final_comments"] = kpi_df["final_comments"].apply(
179
+ lambda x: lcg_comments_mapping.get(x, x)
180
+ )
181
+ kpi_df = kpi_df[LCG_ANALYSIS_COLUMNS]
182
+
183
+ lcg_analysis_df = kpi_df.copy()
184
+
185
+ lcg_analysis_df = lcg_analysis_df[
186
+ [
187
+ "WBTS_name",
188
+ "avg_lcg1",
189
+ "max_lcg1",
190
+ "number_of_days_with_lcg1_exceeded",
191
+ "lcg1_comment",
192
+ "avg_lcg2",
193
+ "max_lcg2",
194
+ "number_of_days_with_lcg2_exceeded",
195
+ "lcg2_comment",
196
+ "difference_between_lcgs",
197
+ "final_comments",
198
+ ]
199
+ ]
200
+
201
+ lcg_analysis_df = lcg_analysis_df.droplevel(level=1, axis=1)
202
+ # Remove row if code less than 5 characters
203
+ lcg_analysis_df = lcg_analysis_df[lcg_analysis_df["WBTS_name"].str.len() >= 5]
204
+
205
+ # Add code
206
+ lcg_analysis_df["code"] = lcg_analysis_df["WBTS_name"].str.split("_").str[0]
207
+
208
+ lcg_analysis_df["code"] = (
209
+ pd.to_numeric(lcg_analysis_df["code"], errors="coerce").fillna(0).astype(int)
210
+ )
211
+
212
+ lcg_analysis_df["Region"] = (
213
+ lcg_analysis_df["WBTS_name"].str.split("_").str[1:2].str.join("_")
214
+ )
215
+ lcg_analysis_df["Region"] = lcg_analysis_df["Region"].fillna("UNKNOWN")
216
+
217
+ # move code to the first column
218
+ lcg_analysis_df = lcg_analysis_df[
219
+ ["code", "Region"]
220
+ + [col for col in lcg_analysis_df if col != "code" and col != "Region"]
221
+ ]
222
+
223
+ # Load physical database
224
+ physical_db: pd.DataFrame = get_physical_db()
225
+
226
+ # Convert code_sector to code
227
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
228
+ # remove duplicates
229
+ physical_db = physical_db.drop_duplicates(subset="code")
230
+
231
+ # keep only code and longitude and latitude
232
+ physical_db = physical_db[["code", "Longitude", "Latitude"]]
233
+
234
+ physical_db["code"] = (
235
+ pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
236
+ )
237
+
238
+ lcg_analysis_df = pd.merge(
239
+ lcg_analysis_df,
240
+ physical_db,
241
+ on="code",
242
+ how="left",
243
+ )
244
+
245
+ return [lcg_analysis_df, kpi_df]
246
+
247
+
248
+ def load_and_process_lcg_data(
249
+ uploaded_file,
250
+ num_last_days,
251
+ num_threshold_days,
252
+ lcg_utilization_threshold,
253
+ difference_between_lcgs,
254
+ ) -> pd.DataFrame:
255
+ """Load and process data for LCG capacity analysis."""
256
+ try:
257
+ # Load data
258
+ df = pd.read_csv(uploaded_file, delimiter=";")
259
+ if df.empty:
260
+ raise ValueError("Uploaded file is empty")
261
+
262
+ df = kpi_naming_cleaning(df)
263
+ df = create_daily_date(df)
264
+
265
+ # Validate required columns
266
+ missing_cols = [col for col in KPI_COLUMNS if col not in df.columns]
267
+ if missing_cols:
268
+ raise ValueError(f"Missing required columns: {', '.join(missing_cols)}")
269
+
270
+ df = df[KPI_COLUMNS]
271
+
272
+ # Process the data
273
+ dfs = lcg_kpi_analysis(
274
+ df,
275
+ num_last_days,
276
+ num_threshold_days,
277
+ lcg_utilization_threshold,
278
+ difference_between_lcgs,
279
+ )
280
+ return dfs
281
+
282
+ except Exception as e:
283
+ # Log the error and re-raise with a user-friendly message
284
+ error_msg = f"Error processing LCG data: {str(e)}"
285
+ st.error(error_msg)
286
+ raise
process_kpi/process_lte_capacity.py ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from queries.process_lte import process_lte_data
5
+ from utils.convert_to_excel import save_dataframe
6
+ from utils.kpi_analysis_utils import (
7
+ LteCapacity,
8
+ analyze_prb_usage,
9
+ cell_availability_analysis,
10
+ create_dfs_per_kpi,
11
+ create_hourly_date,
12
+ kpi_naming_cleaning,
13
+ )
14
+
15
+ LTE_ANALYSIS_COLUMNS = [
16
+ "code",
17
+ "code_sector",
18
+ "Region",
19
+ "site_config_band",
20
+ "Longitude",
21
+ "Latitude",
22
+ "LNCEL_name_l800",
23
+ "LNCEL_name_l1800",
24
+ "LNCEL_name_l2300",
25
+ "LNCEL_name_l2600",
26
+ "LNCEL_name_l1800s",
27
+ "avg_prb_usage_bh_l800",
28
+ "avg_prb_usage_bh_l1800",
29
+ "avg_prb_usage_bh_l2300",
30
+ "avg_prb_usage_bh_l2600",
31
+ "avg_prb_usage_bh_l1800s",
32
+ "avg_prb_usage_bh_l800_2nd",
33
+ "avg_prb_usage_bh_l1800_2nd",
34
+ "avg_prb_usage_bh_l2300_2nd",
35
+ "avg_prb_usage_bh_l2600_2nd",
36
+ "avg_prb_usage_bh_l1800s_2nd",
37
+ "avg_act_ues_l800",
38
+ "avg_act_ues_l1800",
39
+ "avg_act_ues_l2300",
40
+ "avg_act_ues_l2600",
41
+ "avg_act_ues_l1800s",
42
+ "avg_dl_thp_l800",
43
+ "avg_dl_thp_l1800",
44
+ "avg_dl_thp_l2300",
45
+ "avg_dl_thp_l2600",
46
+ "avg_dl_thp_l1800s",
47
+ "avg_ul_thp_l800",
48
+ "avg_ul_thp_l1800",
49
+ "avg_ul_thp_l2300",
50
+ "avg_ul_thp_l2600",
51
+ "avg_ul_thp_l1800s",
52
+ "num_congested_cells",
53
+ "num_cells",
54
+ "num_cell_with_kpi",
55
+ "num_down_or_no_kpi_cells",
56
+ "prb_diff_between_cells",
57
+ "load_balance_required",
58
+ "congestion_comment",
59
+ "final_comments",
60
+ ]
61
+
62
+ LTE_DATABASE_COLUMNS = [
63
+ "code",
64
+ "Region",
65
+ "site_config_band",
66
+ "final_name",
67
+ "Longitude",
68
+ "Latitude",
69
+ ]
70
+
71
+ KPI_COLUMNS = [
72
+ "date",
73
+ "LNCEL_name",
74
+ "Cell_Avail_excl_BLU",
75
+ "E_UTRAN_Avg_PRB_usage_per_TTI_DL",
76
+ "DL_PRB_Util_p_TTI_Lev_10",
77
+ "Avg_PDCP_cell_thp_UL",
78
+ "Avg_PDCP_cell_thp_DL",
79
+ "Avg_act_UEs_DL",
80
+ ]
81
+ PRB_COLUMNS = [
82
+ "LNCEL_name",
83
+ "avg_prb_usage_bh",
84
+ "avg_prb_usage_bh_2nd",
85
+ "avg_act_ues",
86
+ "avg_dl_thp",
87
+ "avg_ul_thp",
88
+ ]
89
+
90
+
91
+ def lte_analysis_logic(
92
+ df: pd.DataFrame,
93
+ prb_usage_threshold: int,
94
+ prb_diff_between_cells_threshold: int,
95
+ ) -> pd.DataFrame:
96
+ lte_analysis_logic_df = df.copy()
97
+ lte_analysis_logic_df["num_congested_cells"] = (
98
+ lte_analysis_logic_df[
99
+ [
100
+ "avg_prb_usage_bh_l800",
101
+ "avg_prb_usage_bh_l1800",
102
+ "avg_prb_usage_bh_l2300",
103
+ "avg_prb_usage_bh_l2600",
104
+ "avg_prb_usage_bh_l1800s",
105
+ ]
106
+ ]
107
+ >= prb_usage_threshold
108
+ ).sum(axis=1)
109
+
110
+ # Add Number of cells LNCEL_name_l800 LNCEL_name_l1800 LNCEL_name_l2300 LNCEL_name_l2600 LNCEL_name_l1800s
111
+ lte_analysis_logic_df["num_cells"] = lte_analysis_logic_df[
112
+ [
113
+ "LNCEL_name_l800",
114
+ "LNCEL_name_l1800",
115
+ "LNCEL_name_l2300",
116
+ "LNCEL_name_l2600",
117
+ "LNCEL_name_l1800s",
118
+ ]
119
+ ].count(axis=1)
120
+
121
+ # Add Number of cell with KPI
122
+ lte_analysis_logic_df["num_cell_with_kpi"] = lte_analysis_logic_df[
123
+ [
124
+ "avg_prb_usage_bh_l800",
125
+ "avg_prb_usage_bh_l1800",
126
+ "avg_prb_usage_bh_l2300",
127
+ "avg_prb_usage_bh_l2600",
128
+ "avg_prb_usage_bh_l1800s",
129
+ ]
130
+ ].count(axis=1)
131
+
132
+ # Number of Down or No KPI cells = num_cells -num_cell_with_kpi
133
+ lte_analysis_logic_df["num_down_or_no_kpi_cells"] = (
134
+ lte_analysis_logic_df["num_cells"] - lte_analysis_logic_df["num_cell_with_kpi"]
135
+ )
136
+
137
+ # Check Max difference between avg_prb_usage_bh_l800 avg_prb_usage_bh_l1800 avg_prb_usage_bh_l2300 avg_prb_usage_bh_l2600 avg_prb_usage_bh_l1800s
138
+ lte_analysis_logic_df["prb_diff_between_cells"] = lte_analysis_logic_df[
139
+ [
140
+ "avg_prb_usage_bh_l800",
141
+ "avg_prb_usage_bh_l1800",
142
+ "avg_prb_usage_bh_l2300",
143
+ "avg_prb_usage_bh_l2600",
144
+ "avg_prb_usage_bh_l1800s",
145
+ ]
146
+ ].apply(lambda row: max(row) - min(row), axis=1)
147
+
148
+ # Add Load balance required column = Yes if prb_diff_between_cells > prb_diff_between_cells_threshold else No
149
+ lte_analysis_logic_df["load_balance_required"] = lte_analysis_logic_df[
150
+ "prb_diff_between_cells"
151
+ ].apply(lambda x: "Yes" if x > prb_diff_between_cells_threshold else "No")
152
+
153
+ # Add Next band column
154
+ lte_analysis_logic_df["next_band"] = lte_analysis_logic_df["site_config_band"].map(
155
+ LteCapacity.next_band_mapping
156
+ )
157
+
158
+ # Add congestion comments
159
+ # if num_congested_cells == 0 and num_down_or_no_kpi_cells == 0 = " No Congestion"
160
+ # if num_congested_cells == 0 and num_down_or_no_kpi_cells > 0 = "No congestion but Down cell"
161
+ # if num_congested_cells > 0 and num_down_or_no_kpi_cells > 0 = "Congestion but Colocated Down Cell"
162
+ # Else Need Action
163
+ conditions = [
164
+ (lte_analysis_logic_df["num_congested_cells"] == 0)
165
+ & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] == 0),
166
+ (lte_analysis_logic_df["num_congested_cells"] == 0)
167
+ & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
168
+ (lte_analysis_logic_df["num_congested_cells"] > 0)
169
+ & (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
170
+ ]
171
+
172
+ choices = [
173
+ "No Congestion",
174
+ "No congestion but Down cell",
175
+ "Congestion but Colocated Down Cell",
176
+ ]
177
+
178
+ lte_analysis_logic_df["congestion_comment"] = np.select(
179
+ conditions, choices, default="Need Action"
180
+ )
181
+
182
+ # Add "Actions" column
183
+ # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Load Balancing parameter tuning required"
184
+ # if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Add Layer"
185
+ # Else keep congestion_comment
186
+ conditions = [
187
+ (lte_analysis_logic_df["load_balance_required"] == "Yes")
188
+ & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
189
+ (lte_analysis_logic_df["load_balance_required"] == "No")
190
+ & (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
191
+ ]
192
+
193
+ choices = [
194
+ "Load Balancing parameter tuning required",
195
+ "Add Layer",
196
+ ]
197
+
198
+ lte_analysis_logic_df["actions"] = np.select(
199
+ conditions, choices, default=lte_analysis_logic_df["congestion_comment"]
200
+ )
201
+
202
+ # Add Final Comments
203
+ # if "actions" = "Add Layer" then "'Add' + 'next_band''
204
+ # Else keep "actions" as it is
205
+ lte_analysis_logic_df["final_comments"] = lte_analysis_logic_df.apply(
206
+ lambda row: (
207
+ f"Add {row['next_band']}"
208
+ if row["actions"] == "Add Layer"
209
+ else row["actions"]
210
+ ),
211
+ axis=1,
212
+ )
213
+
214
+ # create column "sector" equal to conteent of "LNCEL_name_l800" if not empty else "LNCEL_name_l1800" if not empty else "LNCEL_name_l2300"
215
+ lte_analysis_logic_df["sector"] = (
216
+ lte_analysis_logic_df["LNCEL_name_l800"]
217
+ .combine_first(lte_analysis_logic_df["LNCEL_name_l1800"])
218
+ .combine_first(lte_analysis_logic_df["LNCEL_name_l2300"])
219
+ .combine_first(lte_analysis_logic_df["LNCEL_name_l2600"])
220
+ .combine_first(lte_analysis_logic_df["LNCEL_name_l1800s"])
221
+ )
222
+ # remove rows where sector is empty
223
+ lte_analysis_logic_df = lte_analysis_logic_df[
224
+ lte_analysis_logic_df["sector"].notna()
225
+ ]
226
+ # Add sector_id column if sector contains : '_1_" then 1 elif sector contains : '_2_" then 2 elif sector contains : '_3_" then 3
227
+ lte_analysis_logic_df["sector_id"] = np.where(
228
+ lte_analysis_logic_df["sector"].str.contains("_1_"),
229
+ 1,
230
+ np.where(
231
+ lte_analysis_logic_df["sector"].str.contains("_2_"),
232
+ 2,
233
+ np.where(lte_analysis_logic_df["sector"].str.contains("_3_"), 3, np.nan),
234
+ ),
235
+ )
236
+ # add code_sector column by combine code and sector_id
237
+ lte_analysis_logic_df["code_sector"] = (
238
+ lte_analysis_logic_df["code"].astype(str)
239
+ + "_"
240
+ + lte_analysis_logic_df["sector_id"].astype(str)
241
+ )
242
+
243
+ # remove '.0' from code_sector
244
+ lte_analysis_logic_df["code_sector"] = lte_analysis_logic_df[
245
+ "code_sector"
246
+ ].str.replace(".0", "")
247
+
248
+ # lte_analysis_logic_df = lte_analysis_logic_df[LTE_ANALYSIS_COLUMNS]
249
+ return lte_analysis_logic_df
250
+
251
+
252
+ def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
253
+ # Base DataFrame with unique codes, Region, and site_config_band
254
+ all_codes_df = df[
255
+ ["code", "Region", "site_config_band", "Longitude", "Latitude"]
256
+ ].drop_duplicates()
257
+
258
+ # Configuration for sector groups and their respective LNCEL patterns and column suffixes
259
+ # Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
260
+ # lncel_name_pattern_part will be combined with "_<group_key>" or similar
261
+ # Example: for group "1", pattern "_1_L800" gives suffix "l800"
262
+ sector_groups_config = {
263
+ "1": [
264
+ ("_1_L800", "l800"),
265
+ ("_1_L1800", "l1800"),
266
+ ("_1_L2300", "l2300"),
267
+ ("_1_L2600", "l2600"),
268
+ ("_1S_L1800", "l1800s"),
269
+ ],
270
+ "2": [
271
+ ("_2_L800", "l800"),
272
+ ("_2_L1800", "l1800"),
273
+ ("_2_L2300", "l2300"),
274
+ ("_2_L2600", "l2600"),
275
+ ("_2S_L1800", "l1800s"),
276
+ ],
277
+ "3": [
278
+ ("_3_L800", "l800"),
279
+ ("_3_L1800", "l1800"),
280
+ ("_3_L2300", "l2300"),
281
+ ("_3_L2600", "l2600"),
282
+ ("_3S_L1800", "l1800s"),
283
+ ],
284
+ }
285
+
286
+ all_processed_sectors_dfs = []
287
+
288
+ for sector_group_key, band_configurations in sector_groups_config.items():
289
+ # Start with the base DataFrame for the current sector group
290
+ current_sector_group_df = all_codes_df.copy()
291
+
292
+ for lncel_name_pattern, column_suffix in band_configurations:
293
+ # Filter the original DataFrame for the current LNCEL pattern
294
+ # The pattern assumes LNCEL_name contains something like "SITENAME<lncel_name_pattern>"
295
+ filtered_band_df = df[df["LNCEL_name"].str.contains(lncel_name_pattern)]
296
+
297
+ # Select relevant columns and rename them for the merge
298
+ # This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
299
+ df_to_merge = filtered_band_df[
300
+ [
301
+ "code",
302
+ "LNCEL_name",
303
+ "avg_prb_usage_bh",
304
+ "avg_prb_usage_bh_2nd",
305
+ "avg_act_ues",
306
+ "avg_dl_thp",
307
+ "avg_ul_thp",
308
+ ]
309
+ ].rename(
310
+ columns={
311
+ "LNCEL_name": f"LNCEL_name_{column_suffix}",
312
+ "avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
313
+ "avg_prb_usage_bh_2nd": f"avg_prb_usage_bh_{column_suffix}_2nd",
314
+ "avg_act_ues": f"avg_act_ues_{column_suffix}",
315
+ "avg_dl_thp": f"avg_dl_thp_{column_suffix}",
316
+ "avg_ul_thp": f"avg_ul_thp_{column_suffix}",
317
+ }
318
+ )
319
+
320
+ # Perform a left merge
321
+ current_sector_group_df = pd.merge(
322
+ current_sector_group_df, df_to_merge, on="code", how="left"
323
+ )
324
+
325
+ all_processed_sectors_dfs.append(current_sector_group_df)
326
+
327
+ # Concatenate all the processed sector DataFrames
328
+ all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
329
+ # save_dataframe(all_sectors_dfs, "all_sectors_dfs.csv")
330
+
331
+ return all_sectors_dfs
332
+
333
+
334
+ def lte_database_for_capacity(dump_path: str):
335
+ dfs = process_lte_data(dump_path)
336
+ lte_fdd = dfs[0]
337
+ lte_tdd = dfs[1]
338
+
339
+ lte_fdd = lte_fdd[LTE_DATABASE_COLUMNS]
340
+ lte_tdd = lte_tdd[LTE_DATABASE_COLUMNS]
341
+
342
+ lte_db = pd.concat([lte_fdd, lte_tdd], axis=0)
343
+
344
+ # rename final_name to LNCEL_name
345
+ lte_db = lte_db.rename(columns={"final_name": "LNCEL_name"})
346
+
347
+ # save_dataframe(lte_db, "LTE_Database.csv")
348
+ return lte_db
349
+
350
+
351
+ def lte_bh_dfs_per_kpi(
352
+ dump_path: str,
353
+ df: pd.DataFrame,
354
+ number_of_kpi_days: int = 7,
355
+ availability_threshold: int = 95,
356
+ prb_usage_threshold: int = 80,
357
+ prb_diff_between_cells_threshold: int = 20,
358
+ number_of_threshold_days: int = 3,
359
+ main_prb_to_use: str = "",
360
+ ) -> pd.DataFrame:
361
+
362
+ # print(df.columns)
363
+
364
+ pivoted_kpi_dfs = create_dfs_per_kpi(
365
+ df=df,
366
+ pivot_date_column="date",
367
+ pivot_name_column="LNCEL_name",
368
+ kpi_columns_from=2,
369
+ )
370
+ cell_availability_df = cell_availability_analysis(
371
+ df=pivoted_kpi_dfs["Cell_Avail_excl_BLU"],
372
+ days=number_of_kpi_days,
373
+ availability_threshold=availability_threshold,
374
+ )
375
+ prb_usage_df = analyze_prb_usage(
376
+ df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
377
+ number_of_kpi_days=number_of_kpi_days,
378
+ prb_usage_threshold=prb_usage_threshold,
379
+ analysis_type="BH",
380
+ number_of_threshold_days=number_of_threshold_days,
381
+ suffix="" if main_prb_to_use == "E-UTRAN Avg PRB usage per TTI DL" else "_2nd",
382
+ )
383
+ prb_lev10_usage_df = analyze_prb_usage(
384
+ df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
385
+ number_of_kpi_days=number_of_kpi_days,
386
+ prb_usage_threshold=prb_usage_threshold,
387
+ analysis_type="BH",
388
+ number_of_threshold_days=number_of_threshold_days,
389
+ suffix="" if main_prb_to_use == "DL PRB Util p TTI Lev_10" else "_2nd",
390
+ )
391
+ act_ues_df = pivoted_kpi_dfs["Avg_act_UEs_DL"]
392
+ # Add Max and avg columns for act_ues_df
393
+ act_ues_df["max_act_ues"] = act_ues_df.max(axis=1)
394
+ act_ues_df["avg_act_ues"] = act_ues_df.mean(axis=1)
395
+ dl_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_DL"]
396
+ # Add Max and avg columns for dl_thp_df
397
+ dl_thp_df["max_dl_thp"] = dl_thp_df.max(axis=1)
398
+ dl_thp_df["avg_dl_thp"] = dl_thp_df.mean(axis=1)
399
+ ul_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_UL"]
400
+ # Add Max and avg columns for ul_thp_df
401
+ ul_thp_df["max_ul_thp"] = ul_thp_df.max(axis=1)
402
+ ul_thp_df["avg_ul_thp"] = ul_thp_df.mean(axis=1)
403
+
404
+ bh_kpi_df = pd.concat(
405
+ [
406
+ cell_availability_df,
407
+ prb_lev10_usage_df,
408
+ prb_usage_df,
409
+ act_ues_df,
410
+ dl_thp_df,
411
+ ul_thp_df,
412
+ ],
413
+ axis=1,
414
+ )
415
+ bh_kpi_df = bh_kpi_df.reset_index()
416
+ prb_df = bh_kpi_df[PRB_COLUMNS]
417
+
418
+ # drop row if lnCEL_name is empty or 1
419
+ prb_df = prb_df[prb_df["LNCEL_name"].str.len() > 3]
420
+ # prb_df = prb_df.reset_index()
421
+ prb_df = prb_df.droplevel(level=1, axis=1) # Drop the first level (date)
422
+ # prb_df = prb_df.reset_index()
423
+ # prb_df["code"] = prb_df["LNCEL_name"].str.split("_").str[0]
424
+
425
+ lte_db = lte_database_for_capacity(dump_path)
426
+
427
+ db_and_prb = pd.merge(lte_db, prb_df, on="LNCEL_name", how="left")
428
+
429
+ # if avg_prb_usage_bh is "" then set it to "cell exists in dump but not in BH report"
430
+ # db_and_prb.loc[db_and_prb["avg_prb_usage_bh"].isnull(), "avg_prb_usage_bh"] = (
431
+ # "cell exists in dump but not in BH report"
432
+ # )
433
+ # drop row if lnCEL_name is empty or 1
434
+ db_and_prb = db_and_prb[db_and_prb["LNCEL_name"].str.len() > 3]
435
+
436
+ lte_analysis_df = dfs_per_band_cell(db_and_prb)
437
+ lte_analysis_df = lte_analysis_logic(
438
+ lte_analysis_df,
439
+ prb_usage_threshold,
440
+ prb_diff_between_cells_threshold,
441
+ )
442
+
443
+ lte_analysis_df = lte_analysis_df[LTE_ANALYSIS_COLUMNS]
444
+ # Rename columns
445
+ lte_analysis_df = lte_analysis_df.rename(
446
+ columns={
447
+ "LNCEL_name_l800": "name_l800",
448
+ "LNCEL_name_l1800": "name_l1800",
449
+ "LNCEL_name_l2300": "name_l2300",
450
+ "LNCEL_name_l2600": "name_l2600",
451
+ "LNCEL_name_l1800s": "name_l1800s",
452
+ "avg_prb_usage_bh_l800": "prb_l800",
453
+ "avg_prb_usage_bh_l1800": "prb_l1800",
454
+ "avg_prb_usage_bh_l2300": "prb_l2300",
455
+ "avg_prb_usage_bh_l2600": "prb_l2600",
456
+ "avg_prb_usage_bh_l1800s": "prb_l1800s",
457
+ "avg_prb_usage_bh_l800_2nd": "prb_l800_2nd",
458
+ "avg_prb_usage_bh_l1800_2nd": "prb_l1800_2nd",
459
+ "avg_prb_usage_bh_l2300_2nd": "prb_l2300_2nd",
460
+ "avg_prb_usage_bh_l2600_2nd": "prb_l2600_2nd",
461
+ "avg_prb_usage_bh_l1800s_2nd": "prb_l1800s_2nd",
462
+ "avg_act_ues_l800": "act_ues_l800",
463
+ "avg_act_ues_l1800": "act_ues_l1800",
464
+ "avg_act_ues_l2300": "act_ues_l2300",
465
+ "avg_act_ues_l2600": "act_ues_l2600",
466
+ "avg_act_ues_l1800s": "act_ues_l1800s",
467
+ "avg_dl_thp_l800": "dl_thp_l800",
468
+ "avg_dl_thp_l1800": "dl_thp_l1800",
469
+ "avg_dl_thp_l2300": "dl_thp_l2300",
470
+ "avg_dl_thp_l2600": "dl_thp_l2600",
471
+ "avg_dl_thp_l1800s": "dl_thp_l1800s",
472
+ "avg_ul_thp_l800": "ul_thp_l800",
473
+ "avg_ul_thp_l1800": "ul_thp_l1800",
474
+ "avg_ul_thp_l2300": "ul_thp_l2300",
475
+ "avg_ul_thp_l2600": "ul_thp_l2600",
476
+ "avg_ul_thp_l1800s": "ul_thp_l1800s",
477
+ }
478
+ )
479
+
480
+ return [bh_kpi_df, lte_analysis_df]
481
+
482
+
483
+ def process_lte_bh_report(
484
+ dump_path: str,
485
+ bh_report_path: str,
486
+ num_last_days: int,
487
+ num_threshold_days: int,
488
+ availability_threshold: float,
489
+ prb_usage_threshold: float,
490
+ prb_diff_between_cells_threshold: float,
491
+ main_prb_to_use: str,
492
+ ) -> dict:
493
+ """
494
+ Process LTE Busy Hour report and perform capacity analysis
495
+
496
+ Args:
497
+ bh_report_path: Path to BH report CSV file
498
+ num_last_days: Number of last days for analysis
499
+ num_threshold_days: Number of days for threshold calculation
500
+ availability_threshold: Minimum required availability
501
+ prb_usage_threshold: Maximum allowed PRB usage
502
+ prb_diff_between_cells_threshold: Maximum allowed PRB usage difference between cells
503
+
504
+ Returns:
505
+ Dictionary containing analysis results and DataFrames
506
+ """
507
+ LteCapacity.final_results = None
508
+ # lte_db_dfs = lte_database_for_capacity(dump_path)
509
+
510
+ # Read BH report
511
+ df = pd.read_csv(bh_report_path, delimiter=";")
512
+ df = kpi_naming_cleaning(df)
513
+ # print(df.columns)
514
+ df = create_hourly_date(df)
515
+ df = df[KPI_COLUMNS]
516
+ pivoted_kpi_dfs = lte_bh_dfs_per_kpi(
517
+ dump_path=dump_path,
518
+ df=df,
519
+ number_of_kpi_days=num_last_days,
520
+ availability_threshold=availability_threshold,
521
+ prb_usage_threshold=prb_usage_threshold,
522
+ prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
523
+ number_of_threshold_days=num_threshold_days,
524
+ main_prb_to_use=main_prb_to_use,
525
+ )
526
+
527
+ # save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")
528
+ return pivoted_kpi_dfs
process_kpi/process_wbts_capacity.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from utils.kpi_analysis_utils import (
4
+ cell_availability_analysis,
5
+ combine_comments,
6
+ create_daily_date,
7
+ create_dfs_per_kpi,
8
+ kpi_naming_cleaning,
9
+ )
10
+
11
+
12
+ class WbtsCapacity:
13
+ final_results: pd.DataFrame = None
14
+
15
+
16
+ def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str:
17
+ """
18
+ Check if any value in the row deviates more than max_diff from the most common value.
19
+
20
+ Args:
21
+ row: Series of values to check for deviation
22
+ max_diff: Maximum allowed difference from the most common value
23
+ type: Type identifier for the deviation message
24
+
25
+ Returns:
26
+ A message indicating deviation if found, otherwise an empty string
27
+ """
28
+ numeric_row = row.astype(float) # Ensure numeric
29
+ mode_series = numeric_row.mode()
30
+
31
+ # Safe fallback in case mode is empty
32
+ most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0]
33
+
34
+ diffs = abs(numeric_row - most_common)
35
+
36
+ if (diffs > max_diff).any():
37
+ return f"{type} Deviation > {max_diff} detected"
38
+ else:
39
+ return ""
40
+
41
+
42
+ def max_used_bb_subunits_analysis(
43
+ df: pd.DataFrame,
44
+ days: int = 7,
45
+ threshold: int = 80,
46
+ number_of_threshold_days: int = 3,
47
+ ) -> pd.DataFrame:
48
+ """
49
+ Analyze maximum used baseband subunits and identify sites needing upgrades.
50
+
51
+ Args:
52
+ df: DataFrame containing baseband utilization data
53
+ days: Number of days to analyze
54
+ threshold: Utilization threshold percentage for flagging
55
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
56
+
57
+ Returns:
58
+ DataFrame with analysis results and upgrade recommendations
59
+ """
60
+ result_df = df.copy()
61
+ last_days_df = result_df.iloc[:, -days:]
62
+ last_days_df = last_days_df.fillna(0)
63
+
64
+ result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2)
65
+ # Count the number of days above threshold
66
+ result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply(
67
+ lambda row: sum(1 for x in row if x >= threshold), axis=1
68
+ )
69
+
70
+ # Initialize comment column
71
+ result_df["Average_used_bb_ratio_comment"] = ""
72
+
73
+ # Apply condition for upgrade recommendation
74
+ result_df.loc[
75
+ (result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
76
+ & (result_df["Average_used_bb_ratio"] >= threshold),
77
+ "Average_used_bb_ratio_comment",
78
+ ] = "need BB upgrade"
79
+
80
+ return result_df
81
+
82
+
83
+ def max_used_ce_analysis(
84
+ df: pd.DataFrame,
85
+ days: int = 7,
86
+ threshold: int = 80,
87
+ number_of_threshold_days: int = 3,
88
+ ) -> pd.DataFrame:
89
+ """
90
+ Analyze maximum used channel elements and identify sites needing upgrades.
91
+
92
+ Args:
93
+ df: DataFrame containing channel element utilization data
94
+ days: Number of days to analyze
95
+ threshold: Utilization threshold percentage for flagging
96
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
97
+
98
+ Returns:
99
+ DataFrame with analysis results and upgrade recommendations
100
+ """
101
+ result_df = df.copy().fillna(0)
102
+ last_days_df = result_df.iloc[:, -days:]
103
+
104
+ result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2)
105
+
106
+ # Count the number of days above threshold
107
+ result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply(
108
+ lambda row: sum(1 for x in row if x >= threshold), axis=1
109
+ )
110
+
111
+ # Initialize comment column
112
+ result_df["Average_used_ce_ratio_comment"] = ""
113
+
114
+ # Apply condition for upgrade recommendation
115
+ result_df.loc[
116
+ (result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days)
117
+ & (result_df["Average_used_ce_ratio"] >= threshold),
118
+ "Average_used_ce_ratio_comment",
119
+ ] = "need CE upgrade"
120
+
121
+ return result_df
122
+
123
+
124
+ def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame:
125
+ """
126
+ Analyze baseband subunit count for deviations.
127
+
128
+ Args:
129
+ df: DataFrame containing baseband subunit count data
130
+ days: Number of days to analyze
131
+
132
+ Returns:
133
+ DataFrame with deviation analysis comments
134
+ """
135
+ result_df = df.copy()
136
+ last_days_df = result_df.iloc[:, -days:]
137
+ result_df["num_bb_subunits_comment"] = last_days_df.apply(
138
+ lambda row: check_deviation(row, type="bb"), axis=1
139
+ )
140
+ return result_df
141
+
142
+
143
+ def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame:
144
+ """
145
+ Analyze available channel elements for deviations.
146
+
147
+ Args:
148
+ df: DataFrame containing available channel element data
149
+ days: Number of days to analyze
150
+
151
+ Returns:
152
+ DataFrame with deviation analysis comments
153
+ """
154
+ result_df = df.copy()
155
+ last_days_df = result_df.iloc[:, -days:]
156
+ result_df["avail_ce_comment"] = last_days_df.apply(
157
+ lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1
158
+ )
159
+ return result_df
160
+
161
+
162
+ def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
163
+ """
164
+ Combine baseband related comments into a single column.
165
+
166
+ Args:
167
+ df: DataFrame containing baseband comment columns
168
+
169
+ Returns:
170
+ DataFrame with combined baseband comments
171
+ """
172
+ return combine_comments(
173
+ df,
174
+ "num_bb_subunits_comment",
175
+ "Average_used_bb_ratio_comment",
176
+ "availability_comment_daily",
177
+ new_column="bb_comments",
178
+ )
179
+
180
+
181
+ def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame:
182
+ """
183
+ Combine channel element related comments into a single column.
184
+
185
+ Args:
186
+ df: DataFrame containing channel element comment columns
187
+
188
+ Returns:
189
+ DataFrame with combined channel element comments
190
+ """
191
+ return combine_comments(
192
+ df,
193
+ "avail_ce_comment",
194
+ "Average_used_ce_ratio_comment",
195
+ "availability_comment_daily",
196
+ new_column="ce_comments",
197
+ )
198
+
199
+
200
+ def wbts_kpi_analysis(
201
+ df: pd.DataFrame,
202
+ num_days: int = 7,
203
+ threshold: int = 80,
204
+ number_of_threshold_days: int = 3,
205
+ ) -> pd.DataFrame:
206
+ """
207
+ Create pivoted DataFrames for each KPI and perform analysis.
208
+
209
+ Args:
210
+ df: DataFrame containing KPI data
211
+ num_days: Number of days to analyze
212
+ threshold: Utilization threshold percentage for flagging
213
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
214
+
215
+ Returns:
216
+ DataFrame with combined analysis results
217
+ """
218
+ # kpi_columns = df.columns[5:]
219
+ pivoted_kpi_dfs = {}
220
+
221
+ pivoted_kpi_dfs = create_dfs_per_kpi(
222
+ df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5
223
+ )
224
+
225
+ # Extract individual KPI DataFrames
226
+ wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0]
227
+ licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"]
228
+ max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"]
229
+ max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"]
230
+ max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"]
231
+ max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"]
232
+ num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"]
233
+ max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"]
234
+ cell_availability_df = pivoted_kpi_dfs[
235
+ "Cell_Availability_excluding_blocked_by_user_state_BLU"
236
+ ]
237
+ total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
238
+ total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"]
239
+ max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"]
240
+
241
+ # Perform analysis on each KPI DataFrame
242
+ max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis(
243
+ max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days
244
+ )
245
+ cell_availability_df = cell_availability_analysis(cell_availability_df, num_days)
246
+ max_used_ce_ratio_flexi_df = max_used_ce_analysis(
247
+ max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days
248
+ )
249
+ num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days)
250
+ licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days)
251
+
252
+ # Concatenate all DataFrames
253
+ result_df = pd.concat(
254
+ [
255
+ wbts_name_df,
256
+ licensed_ce_df,
257
+ max_used_ce_dl_df,
258
+ max_used_ce_ul_df,
259
+ max_avail_ce_df,
260
+ max_used_bb_subunits_df,
261
+ num_bb_subunits_df,
262
+ max_bb_sus_util_ratio_df,
263
+ cell_availability_df,
264
+ total_cs_traffic_df,
265
+ total_data_traffic_df,
266
+ max_used_ce_ratio_flexi_df,
267
+ ],
268
+ axis=1,
269
+ )
270
+
271
+ # Add combined comments analysis
272
+ result_df = bb_comments_analysis(result_df)
273
+ result_df = ce_comments_analysis(result_df)
274
+
275
+ return result_df
276
+
277
+
278
+ def load_data(
279
+ filepath: str,
280
+ num_days: int,
281
+ threshold: int,
282
+ number_of_threshold_days: int,
283
+ ) -> pd.DataFrame:
284
+ """
285
+ Load data from CSV file and perform preprocessing and analysis.
286
+
287
+ Args:
288
+ filepath: Path to CSV file or uploaded file object
289
+ num_days: Number of days to analyze
290
+ threshold: Utilization threshold percentage for flagging
291
+ number_of_threshold_days: Minimum days above threshold to flag for upgrade
292
+
293
+ Returns:
294
+ DataFrame with processed and analyzed data
295
+ """
296
+ df = pd.read_csv(filepath, delimiter=";")
297
+
298
+ # Preprocess data
299
+ df = create_daily_date(df)
300
+ df = kpi_naming_cleaning(df)
301
+
302
+ # Reorder columns for better organization
303
+ df = df[["date"] + [col for col in df.columns if col not in ["date"]]]
304
+ df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]]
305
+
306
+ # Perform KPI analysis
307
+ df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days)
308
+
309
+ # for col, col_index in zip(df.columns, df.columns.get_indexer(df.columns)):
310
+ # print(f"Column: {col}, Index: {col_index}")
311
+
312
+ return df
process_kpi/process_wcel_capacity.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from utils.kpi_analysis_utils import (
4
+ analyze_fails_kpi,
5
+ cell_availability_analysis,
6
+ combine_comments,
7
+ create_daily_date,
8
+ create_dfs_per_kpi,
9
+ kpi_naming_cleaning,
10
+ summarize_fails_comments,
11
+ )
12
+ from utils.utils_vars import get_physical_db
13
+
14
+ tx_comments_mapping = {
15
+ "iub_frameloss exceeded threshold": "iub frameloss",
16
+ "iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion",
17
+ "hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion",
18
+ }
19
+ operational_comments_mapping = {
20
+ "Down Site": "Down Cell",
21
+ "iub frameloss, instability": "Availability and TX issues",
22
+ "iub frameloss and hsdpa iub congestion, Availability OK": "TX issues",
23
+ "iub frameloss, Availability OK": "TX issues",
24
+ "critical instability": "Availability issues",
25
+ "iub frameloss, critical instability": "Availability and TX issues",
26
+ "iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues",
27
+ "Availability OK": "Site OK",
28
+ "hsdpa iub congestion, instability": "Availability and TX issues",
29
+ "instability": "Availability issues",
30
+ "hsdpa iub congestion, Availability OK": "TX issues",
31
+ "iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues",
32
+ "hsdpa iub congestion, critical instability": "Availability and TX issues",
33
+ }
34
+
35
+ fails_comments_mapping = {
36
+ "ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
37
+ "bts fails": "Bts fails",
38
+ "ac, bts, code fails": "Power and Code fails",
39
+ "ac, code fails": "Power fails",
40
+ "ac fails": "Power fails",
41
+ "ac, ac_dl fails": "Power fails",
42
+ "ac, bts fails": "Power and Bts fails",
43
+ "ac, ac_dl, bts fails": "Power and Bts fails",
44
+ "ac, ac_dl, code fails": "Power and Code fails",
45
+ "ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
46
+ "ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
47
+ }
48
+
49
+ KPI_COLUMNS = [
50
+ "WCEL_name",
51
+ "date",
52
+ "Cell_Availability_excluding_blocked_by_user_state_BLU",
53
+ "Total_CS_traffic_Erl",
54
+ "HSDPA_TRAFFIC_VOLUME",
55
+ "HSDPA_USER_THROUGHPUT",
56
+ "Max_simult_HSDPA_users",
57
+ "IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71",
58
+ "HSDPA_congestion_rate_in_Iub",
59
+ "rrc_conn_stp_fail_ac_M1001C3",
60
+ "RRC_CONN_STP_FAIL_AC_UL_M1001C731",
61
+ "RRC_CONN_STP_FAIL_AC_DL_M1001C732",
62
+ "RRC_CONN_STP_FAIL_AC_COD_M1001C733",
63
+ "rrc_conn_stp_fail_bts_M1001C4",
64
+ ]
65
+
66
+ WCEL_ANALYSIS_COLUMNS = [
67
+ "WCEL_name",
68
+ "Average_cell_availability_daily",
69
+ "number_of_days_exceeding_availability_threshold_daily",
70
+ "availability_comment_daily",
71
+ "sum_traffic_cs",
72
+ "sum_traffic_dl",
73
+ "max_dl_throughput",
74
+ "avg_dl_throughput",
75
+ "max_users",
76
+ "max_iub_frameloss",
77
+ "number_of_days_with_iub_frameloss_exceeded",
78
+ "max_hsdpa_congestion_rate_iub",
79
+ "number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
80
+ "max_rrc_fail_ac",
81
+ "number_of_days_with_rrc_fail_ac_exceeded",
82
+ "max_rrc_fail_ac_ul",
83
+ "number_of_days_with_rrc_fail_ac_ul_exceeded",
84
+ "max_rrc_fail_ac_dl",
85
+ "number_of_days_with_rrc_fail_ac_dl_exceeded",
86
+ "max_rrc_fail_code",
87
+ "number_of_days_with_rrc_fail_code_exceeded",
88
+ "max_rrc_fail_bts",
89
+ "number_of_days_with_rrc_fail_bts_exceeded",
90
+ "tx_congestion_comments",
91
+ "operational_comments",
92
+ "fails_comments",
93
+ "final_comments",
94
+ ]
95
+
96
+
97
+ class WcelCapacity:
98
+ final_results: pd.DataFrame = None
99
+
100
+
101
+ def wcel_kpi_analysis(
102
+ df: pd.DataFrame,
103
+ num_last_days: int,
104
+ num_threshold_days: int,
105
+ availability_threshold: int,
106
+ iub_frameloss_threshold: int,
107
+ hsdpa_congestion_rate_iub_threshold: int,
108
+ fails_treshold: int,
109
+ ) -> pd.DataFrame:
110
+ pivoted_kpi_dfs = create_dfs_per_kpi(
111
+ df=df,
112
+ pivot_date_column="date",
113
+ pivot_name_column="WCEL_name",
114
+ kpi_columns_from=2,
115
+ )
116
+ cell_availability_df = cell_availability_analysis(
117
+ df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"],
118
+ days=num_last_days,
119
+ availability_threshold=availability_threshold,
120
+ )
121
+
122
+ # Trafics, throughput and max users
123
+ trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
124
+ hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"]
125
+ hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
126
+ max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
127
+ # Add Max of Trafics, throughput and max users
128
+ trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
129
+ hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
130
+ hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
131
+ max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
132
+ # add average of Trafics, throughput and max users
133
+ hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
134
+ axis=1
135
+ )
136
+ max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1)
137
+
138
+ # TX Congestion
139
+ iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"]
140
+ hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"]
141
+
142
+ iub_frameloss_df = analyze_fails_kpi(
143
+ df=iub_frameloss_df,
144
+ number_of_kpi_days=num_last_days,
145
+ number_of_threshold_days=num_threshold_days,
146
+ kpi_threshold=iub_frameloss_threshold,
147
+ kpi_column_name="iub_frameloss",
148
+ )
149
+ hsdpa_congestion_rate_iub_df = analyze_fails_kpi(
150
+ df=hsdpa_congestion_rate_iub_df,
151
+ number_of_kpi_days=num_last_days,
152
+ number_of_threshold_days=num_threshold_days,
153
+ kpi_threshold=hsdpa_congestion_rate_iub_threshold,
154
+ kpi_column_name="hsdpa_congestion_rate_iub",
155
+ )
156
+
157
+ # Fails
158
+ rrc_conn_stp_fail_ac_df = analyze_fails_kpi(
159
+ df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"],
160
+ number_of_kpi_days=num_last_days,
161
+ number_of_threshold_days=num_threshold_days,
162
+ kpi_threshold=fails_treshold,
163
+ kpi_column_name="rrc_fail_ac",
164
+ )
165
+ rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi(
166
+ df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"],
167
+ number_of_kpi_days=num_last_days,
168
+ number_of_threshold_days=num_threshold_days,
169
+ kpi_threshold=fails_treshold,
170
+ kpi_column_name="rrc_fail_ac_ul",
171
+ )
172
+ rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi(
173
+ df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"],
174
+ number_of_kpi_days=num_last_days,
175
+ number_of_threshold_days=num_threshold_days,
176
+ kpi_threshold=fails_treshold,
177
+ kpi_column_name="rrc_fail_ac_dl",
178
+ )
179
+ rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi(
180
+ df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"],
181
+ number_of_kpi_days=num_last_days,
182
+ number_of_threshold_days=num_threshold_days,
183
+ kpi_threshold=fails_treshold,
184
+ kpi_column_name="rrc_fail_code",
185
+ )
186
+ rrc_conn_stp_fail_bts_df = analyze_fails_kpi(
187
+ df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"],
188
+ number_of_kpi_days=num_last_days,
189
+ number_of_threshold_days=num_threshold_days,
190
+ kpi_threshold=fails_treshold,
191
+ kpi_column_name="rrc_fail_bts",
192
+ )
193
+
194
+ kpi_df = pd.concat(
195
+ [
196
+ cell_availability_df,
197
+ trafic_cs_df,
198
+ hsdpa_traffic_df,
199
+ hsdpa_user_throughput_df,
200
+ max_simult_hsdpa_users_df,
201
+ iub_frameloss_df,
202
+ hsdpa_congestion_rate_iub_df,
203
+ rrc_conn_stp_fail_ac_df,
204
+ rrc_conn_stp_fail_ac_ul_df,
205
+ rrc_conn_stp_fail_ac_dl_df,
206
+ rrc_conn_stp_fail_ac_cod_df,
207
+ rrc_conn_stp_fail_bts_df,
208
+ ],
209
+ axis=1,
210
+ )
211
+ kpi_df = kpi_df.reset_index()
212
+
213
+ kpi_df = combine_comments(
214
+ kpi_df,
215
+ "iub_frameloss_comment",
216
+ "hsdpa_congestion_rate_iub_comment",
217
+ new_column="tx_congestion_comments",
218
+ )
219
+ kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply(
220
+ lambda x: tx_comments_mapping.get(x, x)
221
+ )
222
+
223
+ kpi_df = combine_comments(
224
+ kpi_df,
225
+ "tx_congestion_comments",
226
+ "availability_comment_daily",
227
+ new_column="operational_comments",
228
+ )
229
+ kpi_df["operational_comments"] = kpi_df["operational_comments"].apply(
230
+ lambda x: operational_comments_mapping.get(x, x)
231
+ )
232
+ kpi_df = combine_comments(
233
+ kpi_df,
234
+ "rrc_fail_ac_comment",
235
+ "rrc_fail_ac_ul_comment",
236
+ "rrc_fail_ac_dl_comment",
237
+ "rrc_fail_code_comment",
238
+ "rrc_fail_bts_comment",
239
+ new_column="fails_comments",
240
+ )
241
+ kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
242
+ kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
243
+ lambda x: fails_comments_mapping.get(x, x)
244
+ )
245
+ kpi_df = combine_comments(
246
+ kpi_df,
247
+ "operational_comments",
248
+ "fails_comments",
249
+ new_column="final_comments",
250
+ )
251
+
252
+ wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
253
+ wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)
254
+
255
+ # Rename
256
+ wcel_analysis_df = wcel_analysis_df.rename(
257
+ columns={
258
+ "WCEL_name": "name",
259
+ "Average_cell_availability_daily": "Avg_availability",
260
+ "number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
261
+ "availability_comment_daily": "availability_comment",
262
+ "number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
263
+ "number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
264
+ "number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
265
+ "number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
266
+ "number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
267
+ "number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
268
+ "number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
269
+ }
270
+ )
271
+ # remove row if name less than 5 characters
272
+ wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]
273
+
274
+ wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
275
+ wcel_analysis_df["code"] = (
276
+ pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
277
+ )
278
+ wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
279
+ # move code to the first column
280
+ wcel_analysis_df = wcel_analysis_df[
281
+ ["code", "Region"]
282
+ + [col for col in wcel_analysis_df if col != "code" and col != "Region"]
283
+ ]
284
+
285
+ # Load physical database
286
+ physical_db: pd.DataFrame = get_physical_db()
287
+
288
+ # Convert code_sector to code
289
+ physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
290
+ # remove duplicates
291
+ physical_db = physical_db.drop_duplicates(subset="code")
292
+
293
+ # keep only code and longitude and latitude
294
+ physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
295
+
296
+ physical_db["code"] = (
297
+ pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
298
+ )
299
+
300
+ wcel_analysis_df = pd.merge(
301
+ wcel_analysis_df,
302
+ physical_db,
303
+ on="code",
304
+ how="left",
305
+ )
306
+
307
+ return [wcel_analysis_df, kpi_df]
308
+
309
+
310
+ def load_and_process_wcel_capacity_data(
311
+ uploaded_file: pd.DataFrame,
312
+ num_last_days: int,
313
+ num_threshold_days: int,
314
+ availability_threshold: int,
315
+ iub_frameloss_threshold: int,
316
+ hsdpa_congestion_rate_iub_threshold: int,
317
+ fails_treshold: int,
318
+ ) -> pd.DataFrame:
319
+ """
320
+ Load and process data for WCEL capacity analysis.
321
+
322
+ Args:
323
+ uploaded_file: Uploaded CSV file containing WCEL capacity data
324
+ num_last_days: Number of days for analysis
325
+ num_threshold_days: Minimum days above threshold to flag for upgrade
326
+ availability_threshold: Utilization threshold percentage for flagging
327
+ iub_frameloss_threshold: Utilization threshold percentage for flagging
328
+ hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging
329
+ fails_treshold: Utilization threshold percentage for flagging
330
+
331
+ Returns:
332
+ Processed DataFrame with WCEL capacity analysis results
333
+ """
334
+ # Load data
335
+ df = pd.read_csv(uploaded_file, delimiter=";")
336
+ df = kpi_naming_cleaning(df)
337
+ df = create_daily_date(df)
338
+ df = df[KPI_COLUMNS]
339
+ dfs = wcel_kpi_analysis(
340
+ df,
341
+ num_last_days,
342
+ num_threshold_days,
343
+ availability_threshold,
344
+ iub_frameloss_threshold,
345
+ hsdpa_congestion_rate_iub_threshold,
346
+ fails_treshold,
347
+ )
348
+ return dfs
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ panel>=1.4
2
+ bokeh>=3.4
3
+ pandas>=2.0
4
+ numpy>=1.23
5
+ plotly>=5.0
6
+ xlsxwriter>=3.0
7
+
8
+ pyarrow>=14.0
9
+ duckdb>=0.9
10
+ openpyxl>=3.1
utils/azimuth_validation.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from utils.convert_to_excel import save_dataframe
4
+
5
+ url = r"./physical_db/physical_database.csv"
6
+
7
+ df = pd.read_csv(url)
8
+
9
+
10
+ def validate_azimuth(group):
11
+ """
12
+ Validates the azimuth ordering within a group.
13
+
14
+ This function checks if the azimuth values are strictly increasing when there are exactly three values.
15
+ To make sure that Sector 3 is higher than Sector 2 and Sector 2 is higher than Sector 1
16
+
17
+ Args:
18
+ group (pd.DataFrame): A DataFrame group containing an 'Azimut' column.
19
+
20
+ Returns:
21
+ bool: True if the azimuth values are strictly increasing when there are exactly three values, False otherwise.
22
+ """
23
+
24
+ azimuths = group.get("Azimut", []).values
25
+ if len(azimuths) == 3 and not (azimuths[0] < azimuths[1] < azimuths[2]):
26
+ return False
27
+ return True
28
+
29
+
30
+ # Apply validation per 'code'
31
+ azimut_verification = df.groupby("CODE").apply(lambda x: validate_azimuth(x))
32
+ df["Azimut_verification"] = df["CODE"].map(azimut_verification)
33
+
34
+ save_dataframe(df, "azimut_verification")
35
+ # print(df)
utils/check_sheet_exist.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ class DumpType:
5
+ full_dump = False
6
+
7
+
8
+ class Technology:
9
+ gsm = False
10
+ wcdma = False
11
+ lte = False
12
+ neighbors = False
13
+ trx = False
14
+ mrbts = False
15
+ mal = False
16
+ invunit = False
17
+
18
+
19
+ # Dictionary of sheet groups to check
20
+ sheets_to_check = {
21
+ "gsm": ["BTS", "BCF", "TRX", "MAL"],
22
+ "neighbors": ["ADCE", "ADJS", "ADJI", "ADJG", "ADJW", "BTS", "WCEL"],
23
+ "wcdma": ["WCEL", "WBTS", "WNCEL"],
24
+ "lte": ["LNBTS", "LNCEL", "LNCEL_FDD", "LNCEL_TDD"],
25
+ "trx": ["TRX", "BTS"],
26
+ "mrbts": ["MRBTS"],
27
+ "mal": ["MAL", "BTS"],
28
+ "invunit": ["INVUNIT"],
29
+ }
30
+
31
+
32
+ def load(file_path):
33
+ # Load the Excel file
34
+ xlsb_file = pd.ExcelFile(file_path, engine="calamine")
35
+
36
+ # Get all sheet names in the file
37
+ available_sheets = xlsb_file.sheet_names
38
+ return available_sheets
39
+
40
+
41
+ def check_sheets(technology_attr, sheet_list, file_path):
42
+ """
43
+ Check if all sheets in the given sheet_list exist in the Excel file.
44
+
45
+ Parameters
46
+ ----------
47
+ technology_attr : str
48
+ The attribute of the Technology class to set.
49
+ sheet_list : list[str]
50
+ The list of sheet names to check.
51
+
52
+ Returns
53
+ -------
54
+ None
55
+ """
56
+ available_sheets = load(file_path)
57
+ missing_sheets = [sheet for sheet in sheet_list if sheet not in available_sheets]
58
+ available_sheets_in_list = [
59
+ sheet for sheet in sheet_list if sheet in available_sheets
60
+ ]
61
+ if not missing_sheets:
62
+ setattr(Technology, technology_attr, True)
63
+ # print(getattr(Technology, technology_attr))
64
+ # print(f"available:", available_sheets_in_list)
65
+ # print("All sheets exist")
66
+
67
+ # else:
68
+ # print(f"Missing sheets: {missing_sheets}")
69
+ # print(f"available:", available_sheets_in_list)
70
+ # print(getattr(Technology, technology_attr))
71
+
72
+
73
+ # Check each technology's sheets
74
+ def execute_checks_sheets_exist(file_path):
75
+ Technology.gsm = False
76
+ Technology.wcdma = False
77
+ Technology.lte = False
78
+ Technology.neighbors = False
79
+ Technology.trx = False
80
+ Technology.mrbts = False
81
+ Technology.invunit = False
82
+ Technology.mal = False
83
+ DumpType.full_dump = False
84
+ for tech_attr, sheets in sheets_to_check.items():
85
+ check_sheets(tech_attr, sheets, file_path)
86
+
87
+
88
+ # execute_checks_sheets_exist(
89
+ # r"C:\Users\David\Documents\PROJECTS\2023\PROJET 2023\DUMP\DUMP\2142\DUMP 2142.xlsb"
90
+ # )
utils/config_band.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ def config_band(df: pd.DataFrame) -> pd.DataFrame:
5
+ """
6
+ Create a dataframe that contains the site configuration band for each site code.
7
+
8
+ Parameters
9
+ ----------
10
+ df : pd.DataFrame
11
+ The dataframe containing the site information, with columns "code" and "band"
12
+
13
+ Returns
14
+ -------
15
+ pd.DataFrame
16
+ The dataframe containing the site configuration band for each site code, with columns "code" and "site_config_band"
17
+ """
18
+ df_band = df[["code", "band"]].copy()
19
+ df_band["ID"] = df_band[["code", "band"]].astype(str).apply("_".join, axis=1)
20
+ # remove duplicates ID
21
+ df_band = df_band.drop_duplicates(subset=["ID"])
22
+ df_band = df_band[["code", "band"]]
23
+ df_band["band"] = df_band["band"].fillna("empty")
24
+ df_band = (
25
+ df_band.groupby("code")["band"]
26
+ .apply(lambda x: "/".join(sorted(x)))
27
+ .reset_index()
28
+ )
29
+ # rename band to config
30
+ df_band.rename(columns={"band": "site_config_band"}, inplace=True)
31
+
32
+ return df_band
33
+
34
+
35
+ def bcf_band(df: pd.DataFrame) -> pd.DataFrame:
36
+ """
37
+ Create a dataframe that contains the bcf configuration band for each bcf ID.
38
+
39
+ Parameters
40
+ ----------
41
+ df : pd.DataFrame
42
+ The dataframe containing the bcf information, with columns "ID" and "band"
43
+
44
+ Returns
45
+ -------
46
+ pd.DataFrame
47
+ The dataframe containing the bcf configuration band for each bcf ID, with columns "ID" and "bcf_config_band"
48
+ """
49
+ df_band = df[["ID_BCF", "band"]].copy()
50
+ df_band["ID"] = df_band[["ID_BCF", "band"]].astype(str).apply("_".join, axis=1)
51
+ # remove duplicates ID
52
+ df_band = df_band.drop_duplicates(subset=["ID"])
53
+ df_band = df_band[["ID_BCF", "band"]]
54
+ df_band["band"] = df_band["band"].fillna("empty")
55
+ df_band = (
56
+ df_band.groupby("ID_BCF")["band"]
57
+ .apply(lambda x: "/".join(sorted(x)))
58
+ .reset_index()
59
+ )
60
+ # rename band to config
61
+ df_band.rename(columns={"band": "bcf_config_band"}, inplace=True)
62
+
63
+ return df_band
64
+
65
+
66
+ def wbts_band(df: pd.DataFrame) -> pd.DataFrame:
67
+ """
68
+ Create a dataframe that contains the wbts configuration band for each wbts ID.
69
+
70
+ Parameters
71
+ ----------
72
+ df : pd.DataFrame
73
+ The dataframe containing the wbts information, with columns "ID" and "band"
74
+
75
+ Returns
76
+ -------
77
+ pd.DataFrame
78
+ The dataframe containing the wbts configuration band for each wbts ID, with columns "ID" and "wbts_config_band"
79
+ """
80
+ df_band = df[["WBTS", "band"]].copy()
81
+ df_band["ID"] = df_band[["WBTS", "band"]].astype(str).apply("_".join, axis=1)
82
+ # remove duplicates ID
83
+ df_band = df_band.drop_duplicates(subset=["ID"])
84
+ df_band = df_band[["WBTS", "band"]]
85
+ df_band["band"] = df_band["band"].fillna("empty")
86
+ df_band = (
87
+ df_band.groupby("WBTS")["band"]
88
+ .apply(lambda x: "/".join(sorted(x)))
89
+ .reset_index()
90
+ )
91
+ # rename band to config
92
+ df_band.rename(columns={"band": "wbts_config_band"}, inplace=True)
93
+
94
+ return df_band
95
+
96
+
97
+ def lte_mrbts_band(df: pd.DataFrame) -> pd.DataFrame:
98
+ """
99
+ Create a dataframe that contains the mrbts configuration band for each mrbts ID.
100
+
101
+ Parameters
102
+ ----------
103
+ df : pd.DataFrame
104
+ The dataframe containing the mrbts information, with columns "ID" and "band"
105
+
106
+ Returns
107
+ -------
108
+ pd.DataFrame
109
+ The dataframe containing the mrbts configuration band for each mrbts ID, with columns "ID" and "mrbts_config_band"
110
+ """
111
+ df_band = df[["MRBTS", "band"]].copy()
112
+ df_band["ID"] = df_band[["MRBTS", "band"]].astype(str).apply("_".join, axis=1)
113
+ # remove duplicates ID
114
+ df_band = df_band.drop_duplicates(subset=["ID"])
115
+ df_band = df_band[["MRBTS", "band"]]
116
+ df_band["band"] = df_band["band"].fillna("empty")
117
+ df_band = (
118
+ df_band.groupby("MRBTS")["band"]
119
+ .apply(lambda x: "/".join(sorted(x)))
120
+ .reset_index()
121
+ )
122
+ # rename band to config
123
+ df_band.rename(columns={"band": "lte_config_band"}, inplace=True)
124
+
125
+ return df_band
126
+
127
+
128
+ def adjl_band(df: pd.DataFrame, id_col: str, band_col: str) -> pd.DataFrame:
129
+ """
130
+ Create a dataframe that contains the adjl configuration band for each adjl ID.
131
+
132
+ Parameters
133
+ ----------
134
+ df : pd.DataFrame
135
+ The dataframe containing the adjl information, with columns "ID" and "band"
136
+
137
+ Returns
138
+ -------
139
+ pd.DataFrame
140
+ The dataframe containing the adjl configuration band for each adjl ID, with columns "ID" and "adjl_config_band"
141
+ """
142
+ df_band = df[[id_col, band_col]].copy()
143
+ df_band["ID"] = df_band[[id_col, band_col]].astype(str).apply("_".join, axis=1)
144
+ # remove duplicates ID
145
+ df_band = df_band.drop_duplicates(subset=["ID"])
146
+ df_band = df_band[[id_col, band_col]]
147
+ df_band[band_col] = df_band[band_col].fillna("empty")
148
+ df_band = (
149
+ df_band.groupby(id_col)[band_col]
150
+ .apply(lambda x: "/".join(sorted(x)))
151
+ .reset_index()
152
+ )
153
+ # rename band to config
154
+ df_band.rename(columns={band_col: "adjl_created_band"}, inplace=True)
155
+
156
+ return df_band
utils/convert_to_excel.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import time
3
+
4
+ import pandas as pd
5
+ import streamlit as st
6
+
7
+ # @st.cache_data
8
+ # def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
9
+ # # IMPORTANT: Cache the conversion to prevent computation on every rerun
10
+
11
+ # # Create a BytesIO object
12
+ # bytes_io = io.BytesIO()
13
+
14
+ # # Write the dataframes to the BytesIO object
15
+ # with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
16
+ # for df, sheet_name in zip(dfs, sheet_names):
17
+ # df.to_excel(writer, sheet_name=sheet_name, index=True)
18
+
19
+ # # Get the bytes data
20
+ # bytes_data = bytes_io.getvalue()
21
+
22
+ # # Close the BytesIO object
23
+ # bytes_io.close()
24
+
25
+ # return bytes_data
26
+
27
+
28
+ def get_formats(workbook):
29
+ return {
30
+ "green": workbook.add_format(
31
+ {"bg_color": "#37CC73", "bold": True, "border": 1}
32
+ ),
33
+ "green_light": workbook.add_format(
34
+ {"bg_color": "#87E0AB", "bold": True, "border": 1}
35
+ ),
36
+ "blue": workbook.add_format({"bg_color": "#1A64FF", "bold": True, "border": 1}),
37
+ "blue_light": workbook.add_format(
38
+ {"bg_color": "#00B0F0", "bold": True, "border": 1}
39
+ ),
40
+ "beurre": workbook.add_format(
41
+ {"bg_color": "#FFE699", "bold": True, "border": 1}
42
+ ),
43
+ "orange": workbook.add_format(
44
+ {"bg_color": "#F47F31", "bold": True, "border": 1}
45
+ ),
46
+ "purple5": workbook.add_format(
47
+ {"bg_color": "#E03DCD", "bold": True, "border": 1}
48
+ ),
49
+ "purple6": workbook.add_format(
50
+ {"bg_color": "#AE83F8", "bold": True, "border": 1}
51
+ ),
52
+ "gray": workbook.add_format({"bg_color": "#D9D9D9", "bold": True, "border": 1}),
53
+ "red": workbook.add_format({"bg_color": "#FF0000", "bold": True, "border": 1}),
54
+ "yellow": workbook.add_format(
55
+ {"bg_color": "#FFFF00", "bold": True, "border": 1}
56
+ ),
57
+ }
58
+
59
+
60
+ def get_format_map_by_format_type(formats: dict, format_type: str) -> dict:
61
+ if format_type == "GSM_Analysis":
62
+ return {
63
+ # "name": formats["blue"],
64
+ "amrSegLoadDepTchRateLower": formats["beurre"],
65
+ "amrSegLoadDepTchRateUpper": formats["beurre"],
66
+ "btsSpLoadDepTchRateLower": formats["beurre"],
67
+ "btsSpLoadDepTchRateUpper": formats["beurre"],
68
+ "amrWbFrCodecModeSet": formats["beurre"],
69
+ "dedicatedGPRScapacity": formats["beurre"],
70
+ "defaultGPRScapacity": formats["beurre"],
71
+ "number_trx_per_cell": formats["blue"],
72
+ "number_trx_per_bcf": formats["blue"],
73
+ "number_tch_per_cell": formats["blue"],
74
+ "number_sd_per_cell": formats["blue"],
75
+ "number_bcch_per_cell": formats["blue"],
76
+ "number_ccch_per_cell": formats["blue"],
77
+ "number_cbc_per_cell": formats["blue"],
78
+ "number_total_channels_per_cell": formats["blue"],
79
+ "number_signals_per_cell": formats["blue"],
80
+ "hf_rate_coef": formats["purple5"],
81
+ "GPRS": formats["purple5"],
82
+ "TCH Actual HR%": formats["green"],
83
+ "Offered Traffic BH": formats["green"],
84
+ "Max_Traffic BH": formats["green"],
85
+ "Avg_Traffic BH": formats["green"],
86
+ "TCH UTILIZATION (@Max Traffic)": formats["red"],
87
+ "Tch utilization comments": formats["orange"],
88
+ "ErlabngB_value": formats["purple6"],
89
+ "Target FR CHs": formats["purple6"],
90
+ "Target HR CHs": formats["purple6"],
91
+ "Target TCHs": formats["purple6"],
92
+ "Target TRXs": formats["purple6"],
93
+ "Number of required TRXs": formats["purple6"],
94
+ "max_tch_call_blocking_bh": formats["yellow"],
95
+ "avg_tch_call_blocking_bh": formats["yellow"],
96
+ "number_of_days_with_tch_blocking_exceeded_bh": formats["yellow"],
97
+ "tch_call_blocking_bh_comment": formats["orange"],
98
+ "max_sdcch_real_blocking_bh": formats["yellow"],
99
+ "avg_sdcch_real_blocking_bh": formats["yellow"],
100
+ "number_of_days_with_sdcch_blocking_exceeded_bh": formats["yellow"],
101
+ "sdcch_real_blocking_bh_comment": formats["orange"],
102
+ "Average_cell_availability_bh": formats["yellow"],
103
+ "number_of_days_exceeding_availability_threshold_bh": formats["yellow"],
104
+ "availability_comment_bh": formats["orange"],
105
+ "max_tch_abis_fail_bh": formats["yellow"],
106
+ "avg_tch_abis_fail_bh": formats["yellow"],
107
+ "number_of_days_with_tch_abis_fail_exceeded_bh": formats["yellow"],
108
+ "tch_abis_fail_bh_comment": formats["orange"],
109
+ "Average_cell_availability_daily": formats["green_light"],
110
+ "number_of_days_exceeding_availability_threshold_daily": formats[
111
+ "green_light"
112
+ ],
113
+ "availability_comment_daily": formats["orange"],
114
+ "max_tch_abis_fail_daily": formats["green_light"],
115
+ "avg_tch_abis_fail_daily": formats["green_light"],
116
+ "number_of_days_with_tch_abis_fail_exceeded_daily": formats["green_light"],
117
+ "tch_abis_fail_daily_comment": formats["orange"],
118
+ "BH Congestion status": formats["gray"],
119
+ "operational_comment": formats["gray"],
120
+ "Final comment": formats["gray"],
121
+ "Final comment summary": formats["gray"],
122
+ # Operational Neighbours Distance Sheet
123
+ "Source_ID_BTS": formats["blue"],
124
+ "Source_name": formats["blue"],
125
+ "Source_BH Congestion status": formats["blue"],
126
+ "Source_Longitude": formats["blue"],
127
+ "Source_Latitude": formats["blue"],
128
+ "Neighbour_ID_BTS": formats["green_light"],
129
+ "Neighbour_name": formats["green_light"],
130
+ "Neighbour_operational_comment": formats["green_light"],
131
+ "Neighbour_Longitude": formats["green_light"],
132
+ "Neighbour_Latitude": formats["green_light"],
133
+ "Distance_km": formats["beurre"],
134
+ }
135
+ elif format_type == "database":
136
+ return {
137
+ "code": formats["blue"],
138
+ "Azimut": formats["green"],
139
+ "Longitude": formats["green"],
140
+ "Latitude": formats["green"],
141
+ "Hauteur": formats["green"],
142
+ "City": formats["green"],
143
+ "Adresse": formats["green"],
144
+ "Commune": formats["green"],
145
+ "Cercle": formats["green"],
146
+ "number_trx_per_cell": formats["blue_light"],
147
+ "number_trx_per_bcf": formats["blue_light"],
148
+ "number_trx_per_site": formats["blue_light"],
149
+ # invunit part in database
150
+ "FBBA": formats["blue_light"],
151
+ "FBBC": formats["blue_light"],
152
+ "FSMF": formats["blue_light"],
153
+ "ABIA": formats["blue_light"],
154
+ "total_number_of_subunit": formats["blue_light"],
155
+ "AHDA": formats["beurre"],
156
+ "AHEGB": formats["beurre"],
157
+ "AHEGC": formats["beurre"],
158
+ "AHEGHA": formats["beurre"],
159
+ "AHGA": formats["beurre"],
160
+ "AHMA": formats["beurre"],
161
+ "AHPMDA": formats["beurre"],
162
+ "AHPMDG": formats["beurre"],
163
+ "AHPMDI": formats["beurre"],
164
+ "ARDA": formats["beurre"],
165
+ "AREA": formats["beurre"],
166
+ "ARGA": formats["beurre"],
167
+ "ARMA": formats["beurre"],
168
+ "AZNA": formats["beurre"],
169
+ "FHDB": formats["beurre"],
170
+ "FHEB": formats["beurre"],
171
+ "FHEL": formats["beurre"],
172
+ "FRGU": formats["beurre"],
173
+ "FRGY": formats["beurre"],
174
+ "FRMB": formats["beurre"],
175
+ "FRMF": formats["beurre"],
176
+ "FXDB": formats["beurre"],
177
+ "FXED": formats["beurre"],
178
+ "FZNI": formats["beurre"],
179
+ }
180
+ elif format_type == "LTE_Analysis":
181
+ return {
182
+ "code": formats["blue"],
183
+ "code_sector": formats["blue"],
184
+ "Region": formats["blue"],
185
+ "site_config_band": formats["blue"],
186
+ "Longitude": formats["blue"],
187
+ "Latitude": formats["blue"],
188
+ # "name_l800": formats["beurre"],
189
+ # "name_l1800": formats["purple5"],
190
+ # "name_l2300": formats["purple6"],
191
+ # "name_l2600": formats["blue_light"],
192
+ # "name_l1800s": formats["gray"],
193
+ "prb_l800": formats["beurre"],
194
+ "prb_l1800": formats["beurre"],
195
+ "prb_l2300": formats["beurre"],
196
+ "prb_l2600": formats["beurre"],
197
+ "prb_l1800s": formats["beurre"],
198
+ "prb_l800_2nd": formats["purple5"],
199
+ "prb_l1800_2nd": formats["purple5"],
200
+ "prb_l2300_2nd": formats["purple5"],
201
+ "prb_l2600_2nd": formats["purple5"],
202
+ "prb_l1800s_2nd": formats["purple5"],
203
+ "act_ues_l800": formats["purple6"],
204
+ "act_ues_l1800": formats["purple6"],
205
+ "act_ues_l2300": formats["purple6"],
206
+ "act_ues_l2600": formats["purple6"],
207
+ "act_ues_l1800s": formats["purple6"],
208
+ "dl_thp_l800": formats["blue_light"],
209
+ "dl_thp_l1800": formats["blue_light"],
210
+ "dl_thp_l2300": formats["blue_light"],
211
+ "dl_thp_l2600": formats["blue_light"],
212
+ "dl_thp_l1800s": formats["blue_light"],
213
+ "ul_thp_l800": formats["gray"],
214
+ "ul_thp_l1800": formats["gray"],
215
+ "ul_thp_l2300": formats["gray"],
216
+ "ul_thp_l2600": formats["gray"],
217
+ "ul_thp_l1800s": formats["gray"],
218
+ "num_congested_cells": formats["orange"],
219
+ "num_cells": formats["orange"],
220
+ "num_cell_with_kpi": formats["orange"],
221
+ "num_down_or_no_kpi_cells": formats["orange"],
222
+ "prb_diff_between_cells": formats["orange"],
223
+ "load_balance_required": formats["orange"],
224
+ "congestion_comment": formats["orange"],
225
+ "final_comments": formats["green"],
226
+ }
227
+
228
+ elif format_type == "WCEL_capacity":
229
+ return {
230
+ "code": formats["blue"],
231
+ "Region": formats["blue"],
232
+ "name": formats["blue"],
233
+ "Avg_availability": formats["blue_light"],
234
+ "Avail_exceed_days": formats["blue_light"],
235
+ "availability_comment": formats["blue_light"],
236
+ "sum_traffic_cs": formats["beurre"],
237
+ "sum_traffic_dl": formats["beurre"],
238
+ "max_dl_throughput": formats["beurre"],
239
+ "avg_dl_throughput": formats["beurre"],
240
+ "max_users": formats["beurre"],
241
+ "max_iub_frameloss": formats["purple5"],
242
+ "iub_frameloss_exceed_days": formats["purple5"],
243
+ "max_hsdpa_congestion_rate_iub": formats["purple5"],
244
+ "hsdpa_iub_exceed_days": formats["purple5"],
245
+ "max_rrc_fail_ac": formats["purple6"],
246
+ "ac_fail_exceed_days": formats["purple6"],
247
+ "max_rrc_fail_ac_ul": formats["purple6"],
248
+ "ac_ul_fail_exceed_days": formats["purple6"],
249
+ "max_rrc_fail_ac_dl": formats["purple6"],
250
+ "ac_dl_fail_exceed_days": formats["purple6"],
251
+ "max_rrc_fail_code": formats["purple6"],
252
+ "code_fail_exceed_days": formats["purple6"],
253
+ "max_rrc_fail_bts": formats["yellow"],
254
+ "bts_fail_exceed_days": formats["yellow"],
255
+ "tx_congestion_comments": formats["green"],
256
+ "operational_comments": formats["green"],
257
+ "fails_comments": formats["green"],
258
+ "final_comments": formats["green"],
259
+ }
260
+
261
+ elif format_type == "invunit":
262
+ return {
263
+ "code": formats["blue"],
264
+ "FBBA": formats["blue_light"],
265
+ "FBBC": formats["blue_light"],
266
+ "FSMF": formats["blue_light"],
267
+ "ABIA": formats["blue_light"],
268
+ "total_number_of_subunit": formats["blue_light"],
269
+ "AHDA": formats["beurre"],
270
+ "AHEGB": formats["beurre"],
271
+ "AHEGC": formats["beurre"],
272
+ "AHEGHA": formats["beurre"],
273
+ "AHGA": formats["beurre"],
274
+ "AHMA": formats["beurre"],
275
+ "AHPMDA": formats["beurre"],
276
+ "AHPMDG": formats["beurre"],
277
+ "AHPMDI": formats["beurre"],
278
+ "ARDA": formats["beurre"],
279
+ "AREA": formats["beurre"],
280
+ "ARGA": formats["beurre"],
281
+ "ARMA": formats["beurre"],
282
+ "AZNA": formats["beurre"],
283
+ "FHDB": formats["beurre"],
284
+ "FHEB": formats["beurre"],
285
+ "FHEL": formats["beurre"],
286
+ "FRGU": formats["beurre"],
287
+ "FRGY": formats["beurre"],
288
+ "FRMB": formats["beurre"],
289
+ "FRMF": formats["beurre"],
290
+ "FXDB": formats["beurre"],
291
+ "FXED": formats["beurre"],
292
+ "FZNI": formats["beurre"],
293
+ }
294
+
295
+ else:
296
+ return {} # No formatting if format_type not matched
297
+
298
+
299
+ def _apply_custom_formatting(
300
+ writer, df: pd.DataFrame, sheet_name: str, format_type: str
301
+ ):
302
+ workbook = writer.book
303
+ worksheet = writer.sheets[sheet_name]
304
+
305
+ formats = get_formats(workbook)
306
+ format_map = get_format_map_by_format_type(formats, format_type)
307
+
308
+ for col_idx, col_name in enumerate(df.columns):
309
+ fmt = format_map.get(col_name)
310
+ if fmt:
311
+ worksheet.write(0, col_idx + 1, col_name, fmt)
312
+
313
+
314
+ def _write_to_excel(
315
+ dfs: list[pd.DataFrame], sheet_names: list[str], index=True, format_type: str = None
316
+ ) -> bytes:
317
+ bytes_io = io.BytesIO()
318
+ with pd.ExcelWriter(bytes_io, engine="xlsxwriter") as writer:
319
+ for df, name in zip(dfs, sheet_names):
320
+ # df.index.name = "index"
321
+ df.to_excel(writer, sheet_name=name, index=index)
322
+ if format_type:
323
+ _apply_custom_formatting(writer, df, name, format_type)
324
+ return bytes_io.getvalue()
325
+
326
+
327
+ @st.cache_data
328
+ def convert_dfs(dfs: list[pd.DataFrame], sheet_names: list[str]) -> bytes:
329
+ return _write_to_excel(dfs, sheet_names, index=True)
330
+
331
+
332
+ @st.cache_data
333
+ def convert_gsm_dfs(dfs, sheet_names) -> bytes:
334
+ return _write_to_excel(dfs, sheet_names, index=True, format_type="GSM_Analysis")
335
+
336
+
337
+ @st.cache_data
338
+ def convert_lte_analysis_dfs(dfs, sheet_names) -> bytes:
339
+ return _write_to_excel(dfs, sheet_names, index=True, format_type="LTE_Analysis")
340
+
341
+
342
+ @st.cache_data
343
+ def convert_wcel_capacity_dfs(dfs, sheet_names) -> bytes:
344
+ return _write_to_excel(dfs, sheet_names, index=True, format_type="WCEL_capacity")
345
+
346
+
347
+ @st.cache_data
348
+ def convert_database_dfs(dfs, sheet_names) -> bytes:
349
+ return _write_to_excel(dfs, sheet_names, index=True, format_type="database")
350
+
351
+
352
+ @st.cache_data
353
+ def convert_invunit_dfs(dfs, sheet_names) -> bytes:
354
+ return _write_to_excel(dfs, sheet_names, index=True, format_type="invunit")
355
+
356
+
357
+ def save_dataframe(df: pd.DataFrame, sheet_name: str):
358
+ """
359
+ Save the dataframe to a csv file.
360
+
361
+ Args:
362
+ df (pd.DataFrame): The dataframe to save.
363
+ sheet_name (str): The name of the sheet.
364
+ """
365
+ df.to_csv(f"data2/{sheet_name}_{time.time()}.csv", index=False, encoding="latin1")
utils/extract_code.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def extract_code_from_mrbts(mrbts):
2
+ """
3
+ Extracts the code from a MRBTS (Mobile Radio Base Transceiver Station) string.
4
+
5
+ Args:
6
+ mrbts (int or str): The MRBTS string to extract the code from.
7
+
8
+ Returns:
9
+ int: The extracted code from the MRBTS string.
10
+
11
+ Raises:
12
+ None.
13
+
14
+ Notes:
15
+ This function handles MRBTS strings that start with '10' and have a length greater than 5,
16
+ as well as MRBTS strings that start with '1', '2', or '3'. For MRBTS strings that do not
17
+ meet these criteria, the entire MRBTS string is returned as an integer.
18
+ """
19
+ str_mrbts = str(mrbts)
20
+
21
+ if len(str_mrbts) > 5 and str_mrbts.startswith("10"):
22
+ # For MRBTS starting with '10' and having length greater than 5
23
+ return int(str_mrbts[2:])
24
+ elif len(str_mrbts) > 4 and str_mrbts.startswith("1"):
25
+ return int(str_mrbts[1:])
26
+ elif len(str_mrbts) > 4 and str_mrbts.startswith("2"):
27
+ # For MRBTS starting with '2' (like 20000 + code)
28
+ return int(str_mrbts[1:])
29
+ elif len(str_mrbts) > 4 and str_mrbts.startswith("3"):
30
+ # For MRBTS starting with '3' (like 30000 + code)
31
+ return int(str_mrbts[1:])
32
+ else:
33
+ # Default case
34
+ return int(str_mrbts)
utils/kml_creator.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import math
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import simplekml
7
+
8
+
9
+ def create_sector(kml: simplekml.Kml, row, arc_angle=65):
10
+ """Create a sector shape for the telecom antenna in KML with sector details."""
11
+ code, name, azimuth, lon, lat, size, color = (
12
+ row["code"],
13
+ row["name"],
14
+ row["Azimut"],
15
+ row["Longitude"],
16
+ row["Latitude"],
17
+ row["size"],
18
+ row["color"],
19
+ )
20
+
21
+ num_points = 20 # Number of points for smooth arc
22
+ start_angle = azimuth - (arc_angle / 2)
23
+ end_angle = azimuth + (arc_angle / 2)
24
+
25
+ coords = [(lon, lat)] # Start with the site location (center point)
26
+
27
+ # Generate points for the sector arc
28
+ for angle in np.linspace(start_angle, end_angle, num_points):
29
+ angle_rad = math.radians(angle)
30
+ arc_lon = lon + (size / 111320) * math.sin(angle_rad)
31
+ arc_lat = lat + (size / 111320) * math.cos(angle_rad)
32
+ coords.append((arc_lon, arc_lat))
33
+
34
+ coords.append((lon, lat)) # Close the polygon
35
+
36
+ # Create the sector polygon
37
+ pol = kml.newpolygon(name=name, outerboundaryis=coords)
38
+
39
+ # Dynamically create the description from all DataFrame columns
40
+ description = "<b>Sector Details:</b><br>"
41
+ for column, value in row.items():
42
+ description += f"<b>{column}:</b> {value}<br>"
43
+
44
+ pol.description = description
45
+ pol.style.polystyle.color = color # Set color from DataFrame
46
+ pol.style.polystyle.outline = 1 # Outline enabled
47
+ pol.style.linestyle.color = "ff000000" # Black outline
48
+
49
+
50
+ def generate_kml_from_df(df: pd.DataFrame):
51
+ """Generate a KML file from a Pandas DataFrame for telecom sectors."""
52
+ kml = simplekml.Kml()
53
+ site_added = set() # Keep track of sites already added to avoid duplicates
54
+
55
+ # Sort the DataFrame to ensure 900 MHz (smaller) is drawn last (on top)
56
+ df_sorted = df.sort_values(
57
+ by="size", ascending=False
58
+ ) # Larger first, smaller on top
59
+
60
+ for _, row in df_sorted.iterrows():
61
+ code, lon, lat = row["code"], row["Longitude"], row["Latitude"]
62
+
63
+ # Add site name as a point only once
64
+ if code not in site_added:
65
+ pnt = kml.newpoint(name=code, coords=[(lon, lat)])
66
+ pnt.style.iconstyle.icon.href = (
67
+ "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
68
+ )
69
+ pnt.style.labelstyle.scale = 1.2 # Adjust label size
70
+ pnt.description = f"Site: {code}<br>Location: {lat}, {lon}"
71
+ site_added.add(code)
72
+
73
+ create_sector(kml, row)
74
+
75
+ kml_data = io.BytesIO()
76
+ kml_str = kml.kml() # Get KML as string
77
+ kml_data.write(kml_str.encode("utf-8")) # Write KML to BytesIO
78
+ kml_data.seek(0) # Move to beginning of BytesIO
79
+ return kml_data
utils/kpi_analysis_utils.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ class GsmAnalysis:
8
+ hf_rate_coef = {
9
+ 10: 1.1,
10
+ 20: 1.2,
11
+ 40: 1.4,
12
+ 60: 1.6,
13
+ 70: 1.7,
14
+ 80: 1.8,
15
+ 99: 2.0,
16
+ 100: 1.4,
17
+ }
18
+ erlangB_table = {
19
+ 1: 0.0204,
20
+ 2: 0.2234,
21
+ 3: 0.6022,
22
+ 4: 1.092,
23
+ 5: 1.657,
24
+ 6: 2.276,
25
+ 7: 2.935,
26
+ 8: 3.627,
27
+ 9: 4.345,
28
+ 10: 5.084,
29
+ 11: 5.841,
30
+ 12: 6.614,
31
+ 13: 7.401,
32
+ 14: 8.2,
33
+ 15: 9.009,
34
+ 16: 9.828,
35
+ 17: 10.66,
36
+ 18: 11.49,
37
+ 19: 12.33,
38
+ 20: 13.18,
39
+ 21: 14.04,
40
+ 22: 14.9,
41
+ 23: 15.76,
42
+ 24: 16.63,
43
+ 25: 17.5,
44
+ 26: 18.38,
45
+ 27: 19.26,
46
+ 28: 20.15,
47
+ 29: 21.04,
48
+ 30: 21.93,
49
+ 31: 22.83,
50
+ 32: 23.72,
51
+ 33: 24.63,
52
+ 34: 25.53,
53
+ 35: 26.43,
54
+ 36: 27.34,
55
+ 37: 28.25,
56
+ 38: 29.17,
57
+ 39: 30.08,
58
+ 40: 31,
59
+ 41: 31.91,
60
+ 42: 32.84,
61
+ 43: 33.76,
62
+ 44: 34.68,
63
+ 45: 35.61,
64
+ 46: 36.53,
65
+ 47: 37.46,
66
+ 48: 38.39,
67
+ 49: 39.32,
68
+ 50: 40.25,
69
+ 51: 41.19,
70
+ 52: 42.12,
71
+ 53: 43.06,
72
+ 54: 44,
73
+ 55: 44.93,
74
+ 56: 45.88,
75
+ 57: 46.81,
76
+ 58: 47.75,
77
+ 59: 48.7,
78
+ 60: 49.64,
79
+ 61: 50.59,
80
+ 62: 51.53,
81
+ 63: 52.48,
82
+ 64: 53.43,
83
+ 65: 54.38,
84
+ 66: 55.32,
85
+ 67: 56.27,
86
+ 68: 57.22,
87
+ 69: 58.18,
88
+ 70: 59.13,
89
+ 71: 60.08,
90
+ 72: 61.04,
91
+ 73: 61.99,
92
+ 74: 62.94,
93
+ 75: 63.9,
94
+ 76: 64.86,
95
+ 77: 65.81,
96
+ 78: 66.77,
97
+ 79: 67.73,
98
+ 80: 68.69,
99
+ 81: 69.64,
100
+ 82: 70.61,
101
+ 83: 71.57,
102
+ 84: 72.53,
103
+ 85: 73.49,
104
+ 86: 74.45,
105
+ 87: 75.41,
106
+ 88: 76.38,
107
+ 89: 77.34,
108
+ 90: 78.3,
109
+ 91: 79.27,
110
+ 92: 80.23,
111
+ 93: 81.2,
112
+ 94: 82.16,
113
+ 95: 83.13,
114
+ 96: 84.09,
115
+ 97: 85.06,
116
+ 98: 86.03,
117
+ 99: 87,
118
+ 100: 87.97,
119
+ 101: 88.94,
120
+ 102: 89.91,
121
+ 103: 90.88,
122
+ 104: 91.85,
123
+ 105: 92.82,
124
+ 106: 93.79,
125
+ 107: 94.76,
126
+ 108: 95.73,
127
+ 109: 96.71,
128
+ 110: 97.68,
129
+ 111: 98.65,
130
+ 112: 99.63,
131
+ 113: 100.6,
132
+ 114: 101.57,
133
+ 115: 102.54,
134
+ 116: 103.52,
135
+ 117: 104.49,
136
+ 118: 105.47,
137
+ 119: 106.44,
138
+ 120: 107.42,
139
+ 121: 108.4,
140
+ 122: 109.37,
141
+ 123: 110.35,
142
+ 124: 111.32,
143
+ 125: 112.3,
144
+ 126: 113.28,
145
+ 127: 114.25,
146
+ 128: 115.23,
147
+ 129: 116.21,
148
+ 130: 117.19,
149
+ 131: 118.17,
150
+ 132: 119.15,
151
+ 133: 120.12,
152
+ 134: 121.1,
153
+ 135: 122.08,
154
+ 136: 123.07,
155
+ 137: 124.04,
156
+ 138: 125.02,
157
+ 139: 126.01341,
158
+ 140: 127.00918,
159
+ 141: 127.96752,
160
+ 142: 128.98152,
161
+ 143: 129.92152,
162
+ 144: 130.88534,
163
+ 145: 131.96461,
164
+ 146: 132.89897,
165
+ 147: 133.86373,
166
+ 148: 134.82569,
167
+ 149: 135.76295,
168
+ 150: 136.82988,
169
+ 151: 137.79,
170
+ 152: 138.77,
171
+ 153: 139.75,
172
+ 154: 140.74,
173
+ 155: 141.72,
174
+ 156: 142.7,
175
+ 157: 143.69,
176
+ 158: 144.67,
177
+ 159: 145.66,
178
+ 160: 146.64,
179
+ 161: 147.63,
180
+ 162: 148.61,
181
+ 163: 149.6,
182
+ 164: 150.58,
183
+ 165: 151.57,
184
+ 166: 152.55,
185
+ 167: 153.54,
186
+ 168: 154.53,
187
+ 169: 155.51,
188
+ 170: 156.5,
189
+ 171: 157.48,
190
+ 172: 158.47,
191
+ 173: 159.46,
192
+ 174: 160.44,
193
+ 175: 161.43,
194
+ 176: 162.42,
195
+ 177: 163.41,
196
+ 178: 164.39,
197
+ 179: 165.38,
198
+ 180: 166.37,
199
+ 181: 167.36,
200
+ 182: 168.35,
201
+ 183: 169.33,
202
+ 184: 170.32,
203
+ 185: 171.31,
204
+ 186: 172.3,
205
+ 187: 173.29,
206
+ 188: 174.28,
207
+ 189: 175.27,
208
+ 190: 176.26,
209
+ 191: 177.25,
210
+ 192: 178.24,
211
+ 193: 179.23,
212
+ 194: 180.22,
213
+ 195: 181.21,
214
+ 196: 182.2,
215
+ 197: 183.19,
216
+ 198: 184.18,
217
+ 199: 185.17,
218
+ 200: 186.16,
219
+ }
220
+
221
+
222
+ class GsmCapacity:
223
+ final_results = None
224
+ operational_neighbours_df = None
225
+ final_comment_mapping = {
226
+ "Availability and TX issues": "Operational issues with no congestion",
227
+ "Availability issues": "Operational issues with no congestion",
228
+ "TX issues": "Operational issues with no congestion",
229
+ "Operational is OK": "Operational is OK with no congestion",
230
+ "Tch utilization exceeded threshold, Availability and TX issues": "High utilization with Operational issues",
231
+ "Tch utilization exceeded threshold, Availability issues": "High utilization with Operational issues",
232
+ "Tch utilization exceeded threshold, TX issues": "High utilization with Operational issues",
233
+ "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
234
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
235
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "High Utilization with Congestion without Operational issues",
236
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High Utilization with Congestion without Operational issues",
237
+ "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
238
+ "Tch utilization exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
239
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
240
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
241
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "High utilization with Congestion and operational issues",
242
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "High utilization with Congestion and operational issues",
243
+ "Tch utilization exceeded threshold, TCH blocking exceeded threshold, TX issues": "High utilization with Congestion and operational issues",
244
+ "Down Site": "Down Cell",
245
+ "SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
246
+ "TCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
247
+ "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Operational is OK": "Congestion without Operational issues",
248
+ "Tch utilization exceeded threshold, Operational is OK": "High utilization without Congestion and Operational issues",
249
+ "SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
250
+ "SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
251
+ "SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
252
+ "TCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
253
+ "TCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
254
+ "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability and TX issues": "Congestion with Operational issues",
255
+ "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, Availability issues": "Congestion with Operational issues",
256
+ "TCH blocking exceeded threshold, SDCCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
257
+ "TCH blocking exceeded threshold, TX issues": "Congestion with Operational issues",
258
+ }
259
+
260
+
261
+ def combine_comments(df: pd.DataFrame, *columns: str, new_column: str) -> pd.DataFrame:
262
+ """
263
+ Combine comments from multiple columns into one column.
264
+
265
+ Args:
266
+ df: DataFrame containing comment columns
267
+ *columns: Variable number of column names containing comments
268
+ new_column: Name for the new combined comments column
269
+
270
+ Returns:
271
+ DataFrame with a new column containing combined comments
272
+ """
273
+ result_df = df.copy()
274
+ result_df[new_column] = result_df[list(columns)].apply(
275
+ lambda row: ", ".join([str(x) for x in row if x]), axis=1
276
+ )
277
+ # Trim all trailing commas
278
+ result_df[new_column] = result_df[new_column].str.replace(
279
+ r"^[,\s]+|[,\s]+$", "", regex=True
280
+ )
281
+ # Replace multiple commas with a single comma
282
+ result_df[new_column] = result_df[new_column].str.replace(
283
+ r",\s*,", ", ", regex=True
284
+ )
285
+ return result_df
286
+
287
+
288
+ def summarize_fails_comments(comment):
289
+ if not comment or pd.isna(comment) or comment.strip() == "":
290
+ return ""
291
+
292
+ # Extract all `rrc_fail_xxx` fields
293
+ matches = re.findall(r"rrc_fail_([a-z_]+)", comment)
294
+ if not matches:
295
+ return ""
296
+
297
+ # Remove duplicates, sort alphabetically
298
+ unique_sorted = sorted(set(matches))
299
+
300
+ # Combine and add 'fails'
301
+ return ", ".join(unique_sorted) + " fails"
302
+
303
+
304
+ def kpi_naming_cleaning(df: pd.DataFrame) -> pd.DataFrame:
305
+ """
306
+ Clean KPI column names by replacing special characters and standardizing format.
307
+
308
+ Args:
309
+ df: DataFrame with KPI column names to clean
310
+
311
+ Returns:
312
+ DataFrame with cleaned column names
313
+ """
314
+ name_df: pd.DataFrame = df.copy()
315
+ name_df.columns = name_df.columns.str.replace("[ /(),-.']", "_", regex=True)
316
+ name_df.columns = name_df.columns.str.replace("___", "_")
317
+ name_df.columns = name_df.columns.str.replace("__", "_")
318
+ name_df.columns = name_df.columns.str.replace("%", "perc")
319
+ name_df.columns = name_df.columns.str.rstrip("_")
320
+ return name_df
321
+
322
+
323
+ def create_daily_date(df: pd.DataFrame) -> pd.DataFrame:
324
+ """
325
+ Create a daily date column from PERIOD_START_TIME and drop unnecessary columns.
326
+
327
+ Args:
328
+ df: DataFrame containing PERIOD_START_TIME column
329
+
330
+ Returns:
331
+ DataFrame with new date column and unnecessary columns removed
332
+ """
333
+ date_df: pd.DataFrame = df.copy()
334
+ date_df[["mois", "jour", "annee"]] = date_df["PERIOD_START_TIME"].str.split(
335
+ ".", expand=True
336
+ )
337
+ date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
338
+ # Remove unnecessary columns
339
+ date_df = date_df.drop(["annee", "mois", "jour", "PERIOD_START_TIME"], axis=1)
340
+ return date_df
341
+
342
+
343
+ def create_hourly_date(df: pd.DataFrame) -> pd.DataFrame:
344
+ date_df: pd.DataFrame = df
345
+ date_df[["date_t", "hour"]] = date_df["PERIOD_START_TIME"].str.split(
346
+ " ", expand=True
347
+ )
348
+ date_df[["mois", "jour", "annee"]] = date_df["date_t"].str.split(".", expand=True)
349
+ date_df["datetime"] = (
350
+ date_df["annee"]
351
+ + "-"
352
+ + date_df["mois"]
353
+ + "-"
354
+ + date_df["jour"]
355
+ + " "
356
+ + date_df["hour"]
357
+ )
358
+
359
+ date_df["date"] = date_df["annee"] + "-" + date_df["mois"] + "-" + date_df["jour"]
360
+
361
+ # Remove columns 'année' and 'mois'
362
+ date_df = date_df.drop(
363
+ ["annee", "mois", "jour", "date_t", "PERIOD_START_TIME"], axis=1
364
+ )
365
+ return date_df
366
+
367
+
368
+ def create_dfs_per_kpi(
369
+ df: pd.DataFrame = None,
370
+ pivot_date_column: str = "date",
371
+ pivot_name_column: str = "BTS_name",
372
+ kpi_columns_from: int = None,
373
+ ) -> pd.DataFrame:
374
+ """
375
+ Create pivoted DataFrames for each KPI and perform analysis.
376
+
377
+ Args:
378
+ df: DataFrame containing KPI data
379
+ Returns:
380
+ DataFrame with combined analysis results
381
+ """
382
+ kpi_columns = df.columns[kpi_columns_from:]
383
+
384
+ pivoted_kpi_dfs = {}
385
+
386
+ # Loop through each KPI and create pivoted DataFrames
387
+ for kpi in kpi_columns:
388
+ temp_df = df[[pivot_date_column, pivot_name_column, kpi]].copy()
389
+ # remove duplicates
390
+ temp_df = temp_df.drop_duplicates(
391
+ subset=[pivot_name_column, pivot_date_column], keep="first"
392
+ )
393
+ temp_df = temp_df.reset_index()
394
+ # Pivot the dataframe
395
+ pivot_df = temp_df.pivot(
396
+ index=pivot_name_column, columns=pivot_date_column, values=kpi
397
+ )
398
+ pivot_df.columns = pd.MultiIndex.from_product([[kpi], pivot_df.columns])
399
+ pivot_df.columns.names = ["KPI", "Date"]
400
+
401
+ # Store in dictionary with KPI name as key
402
+ pivoted_kpi_dfs[kpi] = pivot_df
403
+
404
+ return pivoted_kpi_dfs
405
+
406
+
407
+ def cell_availability_analysis(
408
+ df: pd.DataFrame,
409
+ days: int = 7,
410
+ availability_threshold: int = 95,
411
+ analysis_type: str = "daily",
412
+ ) -> pd.DataFrame:
413
+ """
414
+ Analyze cell availability and categorize sites based on availability metrics.
415
+
416
+ Args:
417
+ df: DataFrame containing cell availability data
418
+ days: Number of days to analyze
419
+
420
+ Returns:
421
+ DataFrame with availability analysis and site status comments
422
+ """
423
+ result_df: pd.DataFrame = df.copy().fillna(0)
424
+ last_days_df: pd.DataFrame = result_df.iloc[:, -days:]
425
+ result_df[f"Average_cell_availability_{analysis_type.lower()}"] = last_days_df.mean(
426
+ axis=1
427
+ ).round(2)
428
+
429
+ # Count the number of days above threshold
430
+ result_df[
431
+ f"number_of_days_exceeding_availability_threshold_{analysis_type.lower()}"
432
+ ] = last_days_df.apply(
433
+ lambda row: sum(1 for x in row if x <= availability_threshold), axis=1
434
+ )
435
+
436
+ # Categorize sites based on availability
437
+ def categorize_availability(x: float) -> str:
438
+ if x == 0 or pd.isnull(x):
439
+ return "Down Site"
440
+ elif 0 < x <= 70:
441
+ return "critical instability"
442
+ elif 70 < x <= availability_threshold:
443
+ return "instability"
444
+ else:
445
+ return "Availability OK"
446
+
447
+ result_df[f"availability_comment_{analysis_type.lower()}"] = result_df[
448
+ f"Average_cell_availability_{analysis_type.lower()}"
449
+ ].apply(categorize_availability)
450
+
451
+ return result_df
452
+
453
+
454
+ def analyze_tch_abis_fails(
455
+ df: pd.DataFrame,
456
+ number_of_kpi_days: int,
457
+ analysis_type: str,
458
+ number_of_threshold_days: int,
459
+ tch_abis_fails_threshold: int,
460
+ ) -> pd.DataFrame:
461
+
462
+ result_df: pd.DataFrame = df.copy()
463
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
464
+ # last_days_df = last_days_df.fillna(0)
465
+
466
+ result_df[f"avg_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.mean(
467
+ axis=1
468
+ ).round(2)
469
+ result_df[f"max_tch_abis_fail_{analysis_type.lower()}"] = last_days_df.max(axis=1)
470
+ # Count the number of days above threshold
471
+ result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"] = (
472
+ last_days_df.apply(
473
+ lambda row: sum(1 for x in row if x >= tch_abis_fails_threshold), axis=1
474
+ )
475
+ )
476
+
477
+ # Add the daily_tch_comment : if number_of_days_with_tch_abis_fail_exceeded_daily is >= number_of_threshold_days : tch abis fail exceeded threshold , else : None
478
+ result_df[f"tch_abis_fail_{analysis_type.lower()}_comment"] = np.where(
479
+ result_df[f"number_of_days_with_tch_abis_fail_exceeded_{analysis_type.lower()}"]
480
+ >= number_of_threshold_days,
481
+ "tch abis fail exceeded threshold",
482
+ None,
483
+ )
484
+
485
+ return result_df
486
+
487
+
488
+ def analyze_tch_call_blocking(
489
+ df: pd.DataFrame,
490
+ number_of_kpi_days: int,
491
+ analysis_type: str,
492
+ number_of_threshold_days: int,
493
+ tch_blocking_threshold: int,
494
+ ) -> pd.DataFrame:
495
+
496
+ result_df = df.copy()
497
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
498
+ # last_days_df = last_days_df.fillna(0)
499
+
500
+ result_df[f"avg_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.mean(
501
+ axis=1
502
+ ).round(2)
503
+ result_df[f"max_tch_call_blocking_{analysis_type.lower()}"] = last_days_df.max(
504
+ axis=1
505
+ )
506
+ # Count the number of days above threshold
507
+ result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"] = (
508
+ last_days_df.apply(
509
+ lambda row: sum(1 for x in row if x >= tch_blocking_threshold), axis=1
510
+ )
511
+ )
512
+
513
+ # Add the daily_tch_comment : if number_of_days_with_tch_blocking_exceeded_daily is >= number_of_threshold_days : tch blocking exceeded threshold , else : None
514
+ result_df[f"tch_call_blocking_{analysis_type.lower()}_comment"] = np.where(
515
+ result_df[f"number_of_days_with_tch_blocking_exceeded_{analysis_type.lower()}"]
516
+ >= number_of_threshold_days,
517
+ "TCH blocking exceeded threshold",
518
+ None,
519
+ )
520
+ return result_df
521
+
522
+
523
+ def analyze_sdcch_call_blocking(
524
+ df: pd.DataFrame,
525
+ number_of_kpi_days: int,
526
+ sdcch_blocking_threshold: int,
527
+ analysis_type: str,
528
+ number_of_threshold_days: int,
529
+ ) -> pd.DataFrame:
530
+
531
+ result_df = df.copy()
532
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
533
+ # last_days_df = last_days_df.fillna(0)
534
+
535
+ result_df[f"avg_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.mean(
536
+ axis=1
537
+ ).round(2)
538
+ result_df[f"max_sdcch_real_blocking_{analysis_type.lower()}"] = last_days_df.max(
539
+ axis=1
540
+ )
541
+ # Count the number of days above threshold
542
+ result_df[
543
+ f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
544
+ ] = last_days_df.apply(
545
+ lambda row: sum(1 for x in row if x >= sdcch_blocking_threshold), axis=1
546
+ )
547
+
548
+ # add daily_sdcch_comment : if number_of_days_with_sdcch_blocking_exceeded_daily is >= number_of_threshold_days : sdcch blocking exceeded threshold , else : None
549
+ result_df[f"sdcch_real_blocking_{analysis_type.lower()}_comment"] = np.where(
550
+ result_df[
551
+ f"number_of_days_with_sdcch_blocking_exceeded_{analysis_type.lower()}"
552
+ ]
553
+ >= number_of_threshold_days,
554
+ "SDCCH blocking exceeded threshold",
555
+ None,
556
+ )
557
+
558
+ return result_df
559
+
560
+
561
+ class LteCapacity:
562
+ final_results = None
563
+ # Next band mapping
564
+ next_band_mapping = {
565
+ "L1800": "L800",
566
+ "L800": "L1800",
567
+ "L1800/L800": "L2600",
568
+ "L1800/L2300/L800": "L2600",
569
+ "L2300/L800": "L2600",
570
+ "L1800/L2600/L800": "New site/Dual Beam",
571
+ "L1800/L2300/L2600/L800": "New site/Dual Beam",
572
+ "L2300": "FDD H// colocated site",
573
+ }
574
+
575
+
576
+ def analyze_prb_usage(
577
+ df: pd.DataFrame,
578
+ number_of_kpi_days: int,
579
+ prb_usage_threshold: int,
580
+ analysis_type: str,
581
+ number_of_threshold_days: int,
582
+ suffix: str = "",
583
+ ) -> pd.DataFrame:
584
+ result_df = df.copy()
585
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
586
+ # last_days_df = last_days_df.fillna(0)
587
+
588
+ result_df[f"avg_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.mean(
589
+ axis=1
590
+ ).round(2)
591
+ result_df[f"max_prb_usage_{analysis_type.lower()}{suffix}"] = last_days_df.max(
592
+ axis=1
593
+ )
594
+ # Count the number of days above threshold
595
+ result_df[
596
+ f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
597
+ ] = last_days_df.apply(
598
+ lambda row: sum(1 for x in row if x >= prb_usage_threshold), axis=1
599
+ )
600
+
601
+ # Add the daily_prb_comment : if number_of_days_with_prb_usage_exceeded_daily is >= number_of_threshold_days : prb usage exceeded threshold , else : None
602
+ result_df[f"prb_usage_{analysis_type.lower()}{suffix}_comment"] = np.where(
603
+ result_df[
604
+ f"number_of_days_with_prb_usage_exceeded_{analysis_type.lower()}{suffix}"
605
+ ]
606
+ >= number_of_threshold_days,
607
+ "PRB usage exceeded threshold",
608
+ None,
609
+ )
610
+ return result_df
611
+
612
+
613
+ def analyze_fails_kpi(
614
+ df: pd.DataFrame,
615
+ number_of_kpi_days: int,
616
+ number_of_threshold_days: int,
617
+ kpi_threshold: int,
618
+ kpi_column_name: str,
619
+ ) -> pd.DataFrame:
620
+ result_df: pd.DataFrame = df.copy()
621
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
622
+ # last_days_df = last_days_df.fillna(0)
623
+
624
+ result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
625
+ result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
626
+ # Count the number of days above threshold
627
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
628
+ lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
629
+ )
630
+
631
+ # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
632
+ result_df[f"{kpi_column_name}_comment"] = np.where(
633
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
634
+ >= number_of_threshold_days,
635
+ f"{kpi_column_name} exceeded threshold",
636
+ None,
637
+ )
638
+ return result_df
639
+
640
+
641
+ def analyze_lcg_utilization(
642
+ df: pd.DataFrame,
643
+ number_of_kpi_days: int,
644
+ number_of_threshold_days: int,
645
+ kpi_threshold: int,
646
+ kpi_column_name: str,
647
+ ) -> pd.DataFrame:
648
+ result_df: pd.DataFrame = df.copy()
649
+ last_days_df: pd.DataFrame = result_df.iloc[:, -number_of_kpi_days:]
650
+ # last_days_df = last_days_df.fillna(0)
651
+
652
+ result_df[f"avg_{kpi_column_name}"] = last_days_df.mean(axis=1).round(2)
653
+ result_df[f"max_{kpi_column_name}"] = last_days_df.max(axis=1)
654
+ # Count the number of days above threshold
655
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"] = last_days_df.apply(
656
+ lambda row: sum(1 for x in row if x >= kpi_threshold), axis=1
657
+ )
658
+
659
+ # Add the {kpi_column_name}_comment : if number_of_days_with_{kpi_column_name}_exceeded_daily is >= number_of_threshold_days : {kpi_column_name} exceeded threshold , else : None
660
+ result_df[f"{kpi_column_name}_comment"] = np.where(
661
+ result_df[f"number_of_days_with_{kpi_column_name}_exceeded"]
662
+ >= number_of_threshold_days,
663
+ f"{kpi_column_name} exceeded threshold",
664
+ None,
665
+ )
666
+ return result_df
utils/rnc_bsc_lac_count_chart.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
4
+ from plotly.subplots import make_subplots
5
+
6
+
7
+ # Reusable function to create subplots
8
+ def create_lac_count_per_controller_subplots(
9
+ df: pd.DataFrame,
10
+ controller_column: str,
11
+ lac_column: str,
12
+ count_column: str,
13
+ fig_title: str,
14
+ ):
15
+ # Get unique controller_IDs
16
+ unique_controllers = df[controller_column].unique()
17
+
18
+ # Calculate the number of rows needed (4 subplots per row)
19
+ rows_needed = (len(unique_controllers) + 3) // 4 # Round up to ensure enough rows
20
+
21
+ # Create subplot structure with a dynamic number of rows and 4 columns per row
22
+ fig = make_subplots(
23
+ rows=rows_needed,
24
+ cols=4,
25
+ shared_xaxes=False,
26
+ subplot_titles=unique_controllers,
27
+ )
28
+
29
+ # Add a counter for positioning the subplots
30
+ subplot_position = 1
31
+
32
+ # Iterate over each controller_ID
33
+ for controller in unique_controllers:
34
+ # Filter data for each controller_ID (create a small dataframe per controller_ID)
35
+ controller_data = df[df[controller_column] == controller]
36
+
37
+ # Determine the row and column for the current subplot
38
+ row = (subplot_position - 1) // 4 + 1
39
+ col = (subplot_position - 1) % 4 + 1
40
+
41
+ # Add bar chart to the subplot
42
+ fig.add_trace(
43
+ go.Bar(
44
+ x=controller_data[lac_column],
45
+ y=controller_data[count_column],
46
+ name=controller,
47
+ text=controller_data[count_column],
48
+ ),
49
+ row=row,
50
+ col=col,
51
+ )
52
+
53
+ # Move to the next subplot position
54
+ subplot_position += 1
55
+
56
+ # Update layout to make it more readable and fit all subplots
57
+ fig.update_layout(
58
+ height=300 * rows_needed,
59
+ title_text=fig_title,
60
+ showlegend=False,
61
+ )
62
+
63
+ # Show the plot
64
+ # fig.show()
65
+
66
+ return fig
67
+
68
+
69
+ def create_bar_chart(df: pd.DataFrame, title: str = "Chart Title") -> px.bar:
70
+ """
71
+ Create a bar chart using Plotly Express with the first column as x and the second column as y.
72
+
73
+ Args:
74
+ df (pd.DataFrame): Input DataFrame
75
+
76
+ Returns:
77
+ fig (px.bar): Bar chart figure
78
+ """
79
+ fig = px.bar(
80
+ df,
81
+ x=df.columns[0],
82
+ y=df.columns[1],
83
+ text_auto=True,
84
+ title=title,
85
+ height=300,
86
+ width=600,
87
+ )
88
+ fig.update_xaxes(tickvals=df[df.columns[0]].unique())
89
+ return fig
utils/utils_functions.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ import pandas as pd
4
+ from geopy.distance import geodesic
5
+
6
+ # Function to calculate distances while preserving all original columns
7
+ # def calculate_distances(
8
+ # df1: pd.DataFrame,
9
+ # df2: pd.DataFrame,
10
+ # code_col1,
11
+ # lat_col1,
12
+ # long_col1,
13
+ # code_col2,
14
+ # lat_col2,
15
+ # long_col2,
16
+ # min_distance: int = 1,
17
+ # ):
18
+ # distances = []
19
+
20
+ # for _, row1 in df1.iterrows():
21
+ # for _, row2 in df2.iterrows():
22
+ # coord1 = (row1[lat_col1], row1[long_col1])
23
+ # coord2 = (row2[lat_col2], row2[long_col2])
24
+ # distance_km = geodesic(coord1, coord2).kilometers # Compute distance
25
+
26
+ # # Combine all original columns + distance
27
+ # combined_row = {
28
+ # **row1.to_dict(), # Keep all columns from Dataset1
29
+ # **{
30
+ # f"{col}_Dataset2": row2[col] for col in df2.columns
31
+ # }, # Keep all columns from Dataset2
32
+ # "Distance_km": distance_km,
33
+ # }
34
+ # distances.append(combined_row)
35
+
36
+ # df_distances = pd.DataFrame(distances)
37
+
38
+ # # Find the closest point for each Point1
39
+ # df_closest: pd.DataFrame = df_distances.loc[
40
+ # df_distances.groupby(code_col1)["Distance_km"].idxmin()
41
+ # ]
42
+
43
+ # # Find the distnce below min_distance
44
+ # df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
45
+
46
+ # return df_distances, df_closest, df_closest_min_distance
47
+
48
+
49
+ def calculate_distances(
50
+ df1: pd.DataFrame,
51
+ df2: pd.DataFrame,
52
+ code_col1: str,
53
+ lat_col1: str,
54
+ long_col1: str,
55
+ code_col2: str,
56
+ lat_col2: str,
57
+ long_col2: str,
58
+ min_distance: float = 1.0,
59
+ ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
60
+ """
61
+ Calculate distances between points in two datasets and find closest matches.
62
+
63
+ Args:
64
+ df1: First DataFrame containing reference points
65
+ df2: Second DataFrame containing points to compare
66
+ code_col1: Column name in df1 containing point identifiers
67
+ lat_col1: Column name in df1 containing latitude
68
+ long_col1: Column name in df1 containing longitude
69
+ code_col2: Column name in df2 containing point identifiers
70
+ lat_col2: Column name in df2 containing latitude
71
+ long_col2: Column name in df2 containing longitude
72
+ min_distance: Minimum distance threshold in kilometers
73
+
74
+ Returns:
75
+ tuple: (all_distances, closest_matches, matches_below_threshold)
76
+ """
77
+ # Validate input columns
78
+ required_cols_1 = {code_col1, lat_col1, long_col1}
79
+ required_cols_2 = {code_col2, lat_col2, long_col2}
80
+
81
+ if not required_cols_1.issubset(df1.columns):
82
+ raise ValueError(
83
+ f"df1 is missing required columns: {required_cols_1 - set(df1.columns)}"
84
+ )
85
+ if not required_cols_2.issubset(df2.columns):
86
+ raise ValueError(
87
+ f"df2 is missing required columns: {required_cols_2 - set(df2.columns)}"
88
+ )
89
+
90
+ # Convert to list of tuples for vectorized operations
91
+ coords1 = df1[[lat_col1, long_col1]].apply(tuple, axis=1).tolist()
92
+ coords2 = df2[[lat_col2, long_col2]].apply(tuple, axis=1).tolist()
93
+
94
+ # Calculate all pairwise distances
95
+ distances = []
96
+ for i, coord1 in enumerate(coords1):
97
+ for j, coord2 in enumerate(coords2):
98
+ try:
99
+ distance_km = geodesic(coord1, coord2).kilometers
100
+ distances.append(
101
+ {
102
+ **df1.iloc[i].to_dict(),
103
+ **{f"{col}_Dataset2": df2.iloc[j][col] for col in df2.columns},
104
+ "Distance_km": distance_km,
105
+ }
106
+ )
107
+ except ValueError as e:
108
+ warnings.warn(
109
+ f"Skipping invalid coordinates: {coord1} or {coord2}: {e}"
110
+ )
111
+ continue
112
+
113
+ if not distances:
114
+ raise ValueError("No valid coordinate pairs were processed")
115
+
116
+ df_distances = pd.DataFrame(distances)
117
+
118
+ # Find closest matches
119
+ df_closest = df_distances.loc[
120
+ df_distances.groupby(code_col1)["Distance_km"].idxmin()
121
+ ]
122
+
123
+ # Filter by minimum distance
124
+ df_closest_min_distance = df_distances[df_distances["Distance_km"] < min_distance]
125
+
126
+ return df_distances, df_closest, df_closest_min_distance
utils/utils_vars.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ # url = "https://raw.githubusercontent.com/DavMelchi/STORAGE/refs/heads/main/physical_db/physical_database.csv"
5
+ url = r"./physical_db/physical_database.csv"
6
+
7
+
8
+ def get_physical_db():
9
+ """
10
+ Reads the physical_database.csv file from the physical_db directory and
11
+ returns a pandas DataFrame containing only the columns 'Code_Sector',
12
+ 'Azimut', 'Longitude', 'Latitude', and 'Hauteur'.
13
+
14
+ Returns:
15
+ pd.DataFrame: A DataFrame containing the filtered columns.
16
+ """
17
+ physical = pd.read_csv(url)
18
+ physical = physical[
19
+ [
20
+ "Code_Sector",
21
+ "Azimut",
22
+ "Longitude",
23
+ "Latitude",
24
+ "Hauteur",
25
+ "City",
26
+ "Adresse",
27
+ "Commune",
28
+ "Cercle",
29
+ ]
30
+ ]
31
+ return physical
32
+
33
+
34
+ class UtilsVars:
35
+ sector_mapping = {
36
+ 4: 1,
37
+ 5: 2,
38
+ 6: 3,
39
+ 11: 1,
40
+ 12: 2,
41
+ 13: 3,
42
+ 71: 1,
43
+ 72: 2,
44
+ 73: 3,
45
+ 81: 1,
46
+ 82: 2,
47
+ 83: 3,
48
+ }
49
+ type_cellule = {1: "Macro Cell 1800", 0: "Macro Cell 900"}
50
+ oml_band_frequence = {1: "OML BAND GSM 1800", 0: "OML BAND GSM 900"}
51
+ gsm_band = {1: "G1800", 0: "G900"}
52
+ configuration_schema = {1: "EGPRS 1800", 0: "EGPRS 900"}
53
+ channeltype_mapping = {4: "BCCH", 3: "TRX_TCH"}
54
+ oml_lte_freq_band = {
55
+ "L1800": "OML E-UTRA Band 3 - 20MHz",
56
+ "L800": "OML E-UTRA Band 20 - 20MHz",
57
+ "L2300": "OML E-UTRA Band 43 - 20MHz",
58
+ "L2600": "OML E-UTRA Band 7 - 20MHz",
59
+ "L700": "OML E-UTRA Band 28 - 20MHz",
60
+ }
61
+ porteuse_mapping = {
62
+ 3004: "OML UTRA Band VIII",
63
+ 3006: "OML UTRA Band VIII",
64
+ 10812: "OML UTRA Band I",
65
+ 10787: "OML UTRA Band I",
66
+ 10837: "OML UTRA Band I",
67
+ }
68
+ color_mapping = {
69
+ "U900": "7fff0000",
70
+ "U2100": "7f00ff00",
71
+ "G900": "7fff0000",
72
+ "G1800": "7f00ff00",
73
+ "L800": "7fff0000",
74
+ "L1800": "7f00ff00",
75
+ "L2300": "7f00ffff",
76
+ "L2600": "7f0000ff",
77
+ "L700": "7fff00ff",
78
+ }
79
+ size_mapping = {
80
+ "U900": 100,
81
+ "U2100": 120,
82
+ "G900": 100,
83
+ "G1800": 120,
84
+ "L800": 120,
85
+ "L1800": 140,
86
+ "L2300": 100,
87
+ "L2600": 90,
88
+ "L700": 80,
89
+ }
90
+ lte_band = {
91
+ 1786: "L1800",
92
+ 6350: "L800",
93
+ 3050: "L2600",
94
+ 38750: "L2300",
95
+ 1761: "L1800",
96
+ 9260: "L700",
97
+ }
98
+ wcdma_band = {
99
+ 3004: "U900",
100
+ 3006: "U900",
101
+ 10787: "U2100",
102
+ 10837: "U2100",
103
+ 10812: "U2100",
104
+ }
105
+ bsc_name = {
106
+ 403698: "MBSCTST",
107
+ 403699: "MBSC01",
108
+ 403701: "MBSC04",
109
+ 403702: "MBSC03",
110
+ 403703: "MBSC02",
111
+ 406283: "MBSKTL01",
112
+ 406284: "MBSSEG01",
113
+ 406308: "MBSSK0S1",
114
+ 406309: "ASBSCMSC3",
115
+ }
116
+ final_lte_database = ""
117
+ final_gsm_database = ""
118
+ final_wcdma_database = ""
119
+ final_trx_database = ""
120
+ final_mrbts_database = ""
121
+ final_invunit_database = ""
122
+ final_mal_database = ""
123
+ gsm_dfs = []
124
+ wcdma_dfs = []
125
+ lte_dfs = []
126
+ all_db_dfs = []
127
+ all_db_dfs_names = []
128
+ final_all_database = None
129
+ atoll_dfs = []
130
+ final_atoll_database = None
131
+ final_nice_database = None
132
+ neighbors_database = ""
133
+ file_path = ""
134
+ gsm_kml_file = None
135
+ wcdma_kml_file = None
136
+ lte_kml_file = None
137
+ adjl_database = None
138
+ # physisal_db = get_physical_db()
139
+
140
+
141
+ def get_band(text):
142
+ """
143
+ Extract the band from the given string.
144
+
145
+ Parameters
146
+ ----------
147
+ text : str
148
+ The string to extract the band from.
149
+
150
+ Returns
151
+ -------
152
+ str or np.nan
153
+ The extracted band, or NaN if the text was not a string or did not contain
154
+ any of the recognized bands (L1800, L2300, L800).
155
+ """
156
+ if isinstance(text, str): # Check if text is a string
157
+ if "L1800" in text:
158
+ return "L1800"
159
+ elif "L2300" in text:
160
+ return "L2300"
161
+ elif "L800" in text:
162
+ return "L800"
163
+ elif "L2600" in text:
164
+ return "L2600"
165
+ elif "L700" in text:
166
+ return "L700"
167
+ return np.nan # or return None
168
+
169
+
170
+ def clean_bands(bands):
171
+ if pd.isna(bands):
172
+ return None
173
+ parts = [p for p in bands.split("/") if p != "nan"]
174
+ return "/".join(parts) if parts else None
175
+
176
+
177
+ class GsmAnalysisData:
178
+ total_number_of_bsc = 0
179
+ total_number_of_cell = 0
180
+ number_of_site = 0
181
+ number_of_cell_per_bsc = pd.DataFrame()
182
+ number_of_site_per_bsc = pd.DataFrame()
183
+ number_of_bts_name_empty = 0
184
+ number_of_bcf_name_empty = 0
185
+ number_of_bcch_empty = 0
186
+ bts_administate_distribution = pd.DataFrame()
187
+ trx_administate_distribution = pd.DataFrame()
188
+ number_of_trx_per_bsc = pd.DataFrame()
189
+ number_of_cell_per_lac = pd.DataFrame()
190
+ number_of_site_per_lac = pd.DataFrame()
191
+ trx_frequency_distribution = pd.DataFrame()
192
+
193
+
194
+ class WcdmaAnalysisData:
195
+ total_number_of_rnc = 0
196
+ total_number_of_wcel = 0
197
+ number_of_site = 0
198
+ number_of_site_per_rnc = 0
199
+ number_of_cell_per_rnc = pd.DataFrame()
200
+ number_of_empty_wbts_name = 0
201
+ number_of_empty_wcel_name = 0
202
+ wcel_administate_distribution = pd.DataFrame()
203
+ psc_distribution = pd.DataFrame()
204
+ number_of_cell_per_lac = pd.DataFrame()
205
+ number_of_site_per_lac = pd.DataFrame()
206
+
207
+
208
+ class LteFddAnalysisData:
209
+ total_number_of_lncel = 0
210
+ total_number_of_site = 0
211
+ number_of_empty_lncel_name = 0
212
+ number_of_empty_lncel_cellname = 0
213
+ number_of_empty_lnbts_name = 0
214
+ number_of_cell_per_band = pd.DataFrame()
215
+ phycellid_distribution = pd.DataFrame()
216
+ rootsequenceindex_distribution = pd.DataFrame()
217
+ lncel_administate_distribution = pd.DataFrame()
218
+ number_of_cell_per_tac = pd.DataFrame()
219
+
220
+
221
+ class LteTddAnalysisData:
222
+ total_number_of_lncel = 0
223
+ total_number_of_site = 0
224
+ number_of_empty_lncel_name = 0
225
+ number_of_empty_lncel_cellname = 0
226
+ number_of_empty_lnbts_name = 0
227
+ number_of_cell_per_band = pd.DataFrame()
228
+ phycellid_distribution = pd.DataFrame()
229
+ rootsequenceindex_distribution = pd.DataFrame()
230
+ lncel_administate_distribution = pd.DataFrame()
231
+ number_of_cell_per_tac = pd.DataFrame()
232
+
233
+
234
+ class SiteAnalysisData:
235
+ total_number_of_site = 0
236
+ total_munber_of_gsm_site = 0
237
+ total_number_of_wcdma_site = 0
238
+ total_number_of_lte_site = 0
239
+ gsm_bands_distribution = pd.DataFrame()
240
+ wcdma_bands_distribution = pd.DataFrame()
241
+ lte_bands_distribution = pd.DataFrame()
242
+ all_bands_distribution = pd.DataFrame()
243
+ number_of_trx_per_site_distribution = pd.DataFrame()