DavMelchi commited on
Commit
a21cda0
·
1 Parent(s): 7cd89f2

Add 3G and LTE cell availability analysis with SLA comparison, multi-RAT site-level reporting, and export functionality

Browse files
Files changed (1) hide show
  1. apps/kpi_analysis/trafic_analysis.py +383 -10
apps/kpi_analysis/trafic_analysis.py CHANGED
@@ -98,9 +98,24 @@ def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
98
  },
99
  inplace=True,
100
  )
101
- df = df.groupby(["date", "ID", "code"], as_index=False)[
102
- ["3g_voice_trafic", "3g_data_trafic"]
103
- ].sum()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  return df
105
 
106
 
@@ -119,7 +134,14 @@ def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
119
  )
120
  df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
121
  df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
122
- df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum()
 
 
 
 
 
 
 
123
  return df
124
 
125
 
@@ -149,14 +171,15 @@ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_r
149
  if col not in df:
150
  df[col] = 0
151
 
152
- tch_mask = None
153
- if "2g_tch_avail" in df.columns:
154
- tch_mask = df["2g_tch_avail"].notna()
 
155
 
156
  df.fillna(0, inplace=True)
157
 
158
- if tch_mask is not None:
159
- df.loc[~tch_mask, "2g_tch_avail"] = np.nan
160
 
161
  df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
162
  df["total_data_trafic"] = (
@@ -366,6 +389,266 @@ def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
366
  return summary_df, site_pivot
367
 
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
370
  df["date"] = pd.to_datetime(df["date"])
371
 
@@ -439,9 +722,13 @@ with number_of_top_trafic_sites_col:
439
  "Number of top traffic sites", value=25
440
  )
441
 
442
- sla_2g_col, _ = st.columns(2)
443
  with sla_2g_col:
444
  sla_2g = st.number_input("2G TCH availability SLA (%)", value=98.0)
 
 
 
 
445
 
446
  if len(pre_range) != 2 or len(post_range) != 2:
447
  st.warning("⚠️ Please select 2 dates for each period (pre and post).")
@@ -505,6 +792,40 @@ if st.button(" Run Analysis"):
505
  "2G TCH availability KPI not found in input report or no data for selected periods."
506
  )
507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  TraficAnalysis.last_period_df = last_period
509
 
510
  #######################################################################################################"""
@@ -674,6 +995,48 @@ if TraficAnalysis.last_period_df is not None:
674
 
675
  st.plotly_chart(fig)
676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  final_dfs = convert_dfs(
678
  [
679
  full_df,
@@ -681,6 +1044,11 @@ if TraficAnalysis.last_period_df is not None:
681
  avg_pre_post_analysis,
682
  monthly_voice_df,
683
  monthly_data_df,
 
 
 
 
 
684
  ],
685
  [
686
  "Global_Trafic_Analysis",
@@ -688,6 +1056,11 @@ if TraficAnalysis.last_period_df is not None:
688
  "Avg_pre_post_analysis",
689
  "Monthly_voice_analysis",
690
  "Monthly_data_analysis",
 
 
 
 
 
691
  ],
692
  )
693
  # 📥 Bouton de téléchargement
 
98
  },
99
  inplace=True,
100
  )
101
+
102
+ kpi_col = None
103
+ for col in df.columns:
104
+ if "cell availability" in str(col).lower():
105
+ kpi_col = col
106
+ break
107
+
108
+ if kpi_col is not None:
109
+ df["3g_cell_avail"] = pd.to_numeric(df[kpi_col], errors="coerce")
110
+
111
+ agg_dict = {
112
+ "3g_voice_trafic": "sum",
113
+ "3g_data_trafic": "sum",
114
+ }
115
+ if "3g_cell_avail" in df.columns:
116
+ agg_dict["3g_cell_avail"] = "mean"
117
+
118
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
119
  return df
120
 
121
 
 
134
  )
135
  df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
136
  df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
137
+ if "Cell Avail excl BLU" in df.columns:
138
+ df["lte_cell_avail"] = pd.to_numeric(df["Cell Avail excl BLU"], errors="coerce")
139
+
140
+ agg_dict = {"lte_data_trafic": "sum"}
141
+ if "lte_cell_avail" in df.columns:
142
+ agg_dict["lte_cell_avail"] = "mean"
143
+
144
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
145
  return df
146
 
147
 
 
171
  if col not in df:
172
  df[col] = 0
173
 
174
+ kpi_masks = {}
175
+ for kpi_col in ["2g_tch_avail", "3g_cell_avail", "lte_cell_avail"]:
176
+ if kpi_col in df.columns:
177
+ kpi_masks[kpi_col] = df[kpi_col].notna()
178
 
179
  df.fillna(0, inplace=True)
180
 
181
+ for kpi_col, mask in kpi_masks.items():
182
+ df.loc[~mask, kpi_col] = np.nan
183
 
184
  df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
185
  df["total_data_trafic"] = (
 
389
  return summary_df, site_pivot
390
 
391
 
392
+ def analyze_3g_availability(df: pd.DataFrame, sla_3g: float):
393
+ avail_col = "3g_cell_avail"
394
+
395
+ if avail_col not in df.columns or "period" not in df.columns:
396
+ return None, None
397
+
398
+ df_3g = df[df[avail_col].notna()].copy()
399
+ df_3g = df_3g[df_3g["period"].isin(["pre", "post"])]
400
+
401
+ if df_3g.empty:
402
+ return None, None
403
+
404
+ site_pivot = df_3g.groupby(["code", "period"])[avail_col].mean().unstack()
405
+
406
+ site_pivot = site_pivot.rename(
407
+ columns={"pre": "cell_avail_pre", "post": "cell_avail_post"}
408
+ )
409
+
410
+ if "cell_avail_pre" not in site_pivot.columns:
411
+ site_pivot["cell_avail_pre"] = pd.NA
412
+ if "cell_avail_post" not in site_pivot.columns:
413
+ site_pivot["cell_avail_post"] = pd.NA
414
+
415
+ site_pivot["cell_avail_diff"] = (
416
+ site_pivot["cell_avail_post"] - site_pivot["cell_avail_pre"]
417
+ )
418
+ site_pivot["pre_ok_vs_sla"] = site_pivot["cell_avail_pre"] >= sla_3g
419
+ site_pivot["post_ok_vs_sla"] = site_pivot["cell_avail_post"] >= sla_3g
420
+
421
+ site_pivot = site_pivot.reset_index()
422
+
423
+ summary_rows = []
424
+ for period_label, col_name in [
425
+ ("pre", "cell_avail_pre"),
426
+ ("post", "cell_avail_post"),
427
+ ]:
428
+ series = site_pivot[col_name].dropna()
429
+ total_cells = series.shape[0]
430
+ if total_cells == 0:
431
+ summary_rows.append(
432
+ {
433
+ "period": period_label,
434
+ "cells": 0,
435
+ "avg_availability": pd.NA,
436
+ "median_availability": pd.NA,
437
+ "p05_availability": pd.NA,
438
+ "p95_availability": pd.NA,
439
+ "min_availability": pd.NA,
440
+ "max_availability": pd.NA,
441
+ "cells_ge_sla": 0,
442
+ "cells_lt_sla": 0,
443
+ "pct_cells_ge_sla": pd.NA,
444
+ }
445
+ )
446
+ continue
447
+ cells_ge_sla = (series >= sla_3g).sum()
448
+ cells_lt_sla = (series < sla_3g).sum()
449
+ summary_rows.append(
450
+ {
451
+ "period": period_label,
452
+ "cells": int(total_cells),
453
+ "avg_availability": series.mean(),
454
+ "median_availability": series.median(),
455
+ "p05_availability": series.quantile(0.05),
456
+ "p95_availability": series.quantile(0.95),
457
+ "min_availability": series.min(),
458
+ "max_availability": series.max(),
459
+ "cells_ge_sla": int(cells_ge_sla),
460
+ "cells_lt_sla": int(cells_lt_sla),
461
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
462
+ }
463
+ )
464
+
465
+ summary_df = pd.DataFrame(summary_rows)
466
+
467
+ return summary_df, site_pivot
468
+
469
+
470
+ def analyze_lte_availability(df: pd.DataFrame, sla_lte: float):
471
+ avail_col = "lte_cell_avail"
472
+
473
+ if avail_col not in df.columns or "period" not in df.columns:
474
+ return None, None
475
+
476
+ df_lte = df[df[avail_col].notna()].copy()
477
+ df_lte = df_lte[df_lte["period"].isin(["pre", "post"])]
478
+
479
+ if df_lte.empty:
480
+ return None, None
481
+
482
+ site_pivot = df_lte.groupby(["code", "period"])[avail_col].mean().unstack()
483
+
484
+ site_pivot = site_pivot.rename(
485
+ columns={"pre": "lte_avail_pre", "post": "lte_avail_post"}
486
+ )
487
+
488
+ if "lte_avail_pre" not in site_pivot.columns:
489
+ site_pivot["lte_avail_pre"] = pd.NA
490
+ if "lte_avail_post" not in site_pivot.columns:
491
+ site_pivot["lte_avail_post"] = pd.NA
492
+
493
+ site_pivot["lte_avail_diff"] = (
494
+ site_pivot["lte_avail_post"] - site_pivot["lte_avail_pre"]
495
+ )
496
+ site_pivot["pre_ok_vs_sla"] = site_pivot["lte_avail_pre"] >= sla_lte
497
+ site_pivot["post_ok_vs_sla"] = site_pivot["lte_avail_post"] >= sla_lte
498
+
499
+ site_pivot = site_pivot.reset_index()
500
+
501
+ summary_rows = []
502
+ for period_label, col_name in [
503
+ ("pre", "lte_avail_pre"),
504
+ ("post", "lte_avail_post"),
505
+ ]:
506
+ series = site_pivot[col_name].dropna()
507
+ total_cells = series.shape[0]
508
+ if total_cells == 0:
509
+ summary_rows.append(
510
+ {
511
+ "period": period_label,
512
+ "cells": 0,
513
+ "avg_availability": pd.NA,
514
+ "median_availability": pd.NA,
515
+ "p05_availability": pd.NA,
516
+ "p95_availability": pd.NA,
517
+ "min_availability": pd.NA,
518
+ "max_availability": pd.NA,
519
+ "cells_ge_sla": 0,
520
+ "cells_lt_sla": 0,
521
+ "pct_cells_ge_sla": pd.NA,
522
+ }
523
+ )
524
+ continue
525
+ cells_ge_sla = (series >= sla_lte).sum()
526
+ cells_lt_sla = (series < sla_lte).sum()
527
+ summary_rows.append(
528
+ {
529
+ "period": period_label,
530
+ "cells": int(total_cells),
531
+ "avg_availability": series.mean(),
532
+ "median_availability": series.median(),
533
+ "p05_availability": series.quantile(0.05),
534
+ "p95_availability": series.quantile(0.95),
535
+ "min_availability": series.min(),
536
+ "max_availability": series.max(),
537
+ "cells_ge_sla": int(cells_ge_sla),
538
+ "cells_lt_sla": int(cells_lt_sla),
539
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
540
+ }
541
+ )
542
+
543
+ summary_df = pd.DataFrame(summary_rows)
544
+
545
+ return summary_df, site_pivot
546
+
547
+
548
+ def analyze_multirat_availability(
549
+ df: pd.DataFrame, sla_2g: float, sla_3g: float, sla_lte: float
550
+ ):
551
+ if "period" not in df.columns:
552
+ return None
553
+
554
+ rat_cols = []
555
+ if "2g_tch_avail" in df.columns:
556
+ rat_cols.append("2g_tch_avail")
557
+ if "3g_cell_avail" in df.columns:
558
+ rat_cols.append("3g_cell_avail")
559
+ if "lte_cell_avail" in df.columns:
560
+ rat_cols.append("lte_cell_avail")
561
+
562
+ if not rat_cols:
563
+ return None
564
+
565
+ agg_dict = {col: "mean" for col in rat_cols}
566
+
567
+ pre = df[df["period"] == "pre"].groupby("code", as_index=False).agg(agg_dict)
568
+ post = df[df["period"] == "post"].groupby("code", as_index=False).agg(agg_dict)
569
+
570
+ rename_map_pre = {
571
+ "2g_tch_avail": "2g_avail_pre",
572
+ "3g_cell_avail": "3g_avail_pre",
573
+ "lte_cell_avail": "lte_avail_pre",
574
+ }
575
+ rename_map_post = {
576
+ "2g_tch_avail": "2g_avail_post",
577
+ "3g_cell_avail": "3g_avail_post",
578
+ "lte_cell_avail": "lte_avail_post",
579
+ }
580
+
581
+ pre = pre.rename(columns=rename_map_pre)
582
+ post = post.rename(columns=rename_map_post)
583
+
584
+ multi = pd.merge(pre, post, on="code", how="outer")
585
+
586
+ if "City" in df.columns:
587
+ city_df = df[["code", "City"]].drop_duplicates("code")
588
+ multi = pd.merge(multi, city_df, on="code", how="left")
589
+
590
+ # Compute OK/Not OK flags vs SLA on post-period
591
+ def _ok_flag(series: pd.Series, sla: float) -> pd.Series:
592
+ if series.name not in multi.columns:
593
+ return pd.Series([pd.NA] * len(multi), index=multi.index)
594
+ ok = multi[series.name] >= sla
595
+ ok = ok.where(multi[series.name].notna(), pd.NA)
596
+ return ok
597
+
598
+ if "2g_avail_post" in multi.columns:
599
+ multi["ok_2g_post"] = _ok_flag(multi["2g_avail_post"], sla_2g)
600
+ if "3g_avail_post" in multi.columns:
601
+ multi["ok_3g_post"] = _ok_flag(multi["3g_avail_post"], sla_3g)
602
+ if "lte_avail_post" in multi.columns:
603
+ multi["ok_lte_post"] = _ok_flag(multi["lte_avail_post"], sla_lte)
604
+
605
+ def classify_row(row):
606
+ rats_status = []
607
+ for rat, col in [
608
+ ("2G", "ok_2g_post"),
609
+ ("3G", "ok_3g_post"),
610
+ ("LTE", "ok_lte_post"),
611
+ ]:
612
+ if col in row and not pd.isna(row[col]):
613
+ rats_status.append((rat, bool(row[col])))
614
+
615
+ if not rats_status:
616
+ return "No RAT data"
617
+
618
+ bad_rats = [rat for rat, ok in rats_status if not ok]
619
+ if not bad_rats:
620
+ return "OK all RAT"
621
+ if len(bad_rats) == 1:
622
+ return f"Degraded {bad_rats[0]} only"
623
+ return "Degraded multi-RAT (" + ",".join(bad_rats) + ")"
624
+
625
+ multi["post_multirat_status"] = multi.apply(classify_row, axis=1)
626
+
627
+ # Order columns for readability
628
+ ordered_cols = ["code"]
629
+ if "City" in multi.columns:
630
+ ordered_cols.append("City")
631
+ for col in [
632
+ "2g_avail_pre",
633
+ "2g_avail_post",
634
+ "3g_avail_pre",
635
+ "3g_avail_post",
636
+ "lte_avail_pre",
637
+ "lte_avail_post",
638
+ "ok_2g_post",
639
+ "ok_3g_post",
640
+ "ok_lte_post",
641
+ "post_multirat_status",
642
+ ]:
643
+ if col in multi.columns:
644
+ ordered_cols.append(col)
645
+
646
+ remaining_cols = [c for c in multi.columns if c not in ordered_cols]
647
+ multi = multi[ordered_cols + remaining_cols]
648
+
649
+ return multi
650
+
651
+
652
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
653
  df["date"] = pd.to_datetime(df["date"])
654
 
 
722
  "Number of top traffic sites", value=25
723
  )
724
 
725
+ sla_2g_col, sla_3g_col, sla_lte_col = st.columns(3)
726
  with sla_2g_col:
727
  sla_2g = st.number_input("2G TCH availability SLA (%)", value=98.0)
728
+ with sla_3g_col:
729
+ sla_3g = st.number_input("3G Cell availability SLA (%)", value=98.0)
730
+ with sla_lte_col:
731
+ sla_lte = st.number_input("LTE Cell availability SLA (%)", value=98.0)
732
 
733
  if len(pre_range) != 2 or len(post_range) != 2:
734
  st.warning("⚠️ Please select 2 dates for each period (pre and post).")
 
792
  "2G TCH availability KPI not found in input report or no data for selected periods."
793
  )
794
 
795
+ summary_3g_avail, site_3g_avail = analyze_3g_availability(full_df, sla_3g)
796
+ if summary_3g_avail is not None:
797
+ st.subheader("3G - Cell Availability vs SLA")
798
+ st.write(f"SLA target 3G Cell availability: {sla_3g}%")
799
+ st.dataframe(summary_3g_avail.round(2))
800
+
801
+ st.subheader("3G - Cell Availability by site (worst 25 by post-period)")
802
+ worst_sites_3g = site_3g_avail.sort_values("cell_avail_post").head(25)
803
+ st.dataframe(worst_sites_3g.round(2))
804
+ else:
805
+ st.info(
806
+ "3G Cell Availability KPI not found in input report or no data for selected periods."
807
+ )
808
+
809
+ summary_lte_avail, site_lte_avail = analyze_lte_availability(full_df, sla_lte)
810
+ if summary_lte_avail is not None:
811
+ st.subheader("LTE - Cell Availability vs SLA")
812
+ st.write(f"SLA target LTE Cell availability: {sla_lte}%")
813
+ st.dataframe(summary_lte_avail.round(2))
814
+
815
+ st.subheader("LTE - Cell Availability by site (worst 25 by post-period)")
816
+ worst_sites_lte = site_lte_avail.sort_values("lte_avail_post").head(25)
817
+ st.dataframe(worst_sites_lte.round(2))
818
+ else:
819
+ st.info(
820
+ "LTE Cell Availability KPI not found in input report or no data for selected periods."
821
+ )
822
+
823
+ # Multi-RAT availability view
824
+ multi_rat_df = analyze_multirat_availability(full_df, sla_2g, sla_3g, sla_lte)
825
+ if multi_rat_df is not None:
826
+ st.subheader("Multi-RAT Availability by site (post-period)")
827
+ st.dataframe(multi_rat_df.round(2))
828
+
829
  TraficAnalysis.last_period_df = last_period
830
 
831
  #######################################################################################################"""
 
995
 
996
  st.plotly_chart(fig)
997
 
998
+ # Prepare availability DataFrames for export (fallback to empty if KPI missing)
999
+ summary_frames = []
1000
+ if "summary_2g_avail" in locals() and summary_2g_avail is not None:
1001
+ tmp = summary_2g_avail.copy()
1002
+ tmp["RAT"] = "2G"
1003
+ summary_frames.append(tmp)
1004
+ if "summary_3g_avail" in locals() and summary_3g_avail is not None:
1005
+ tmp = summary_3g_avail.copy()
1006
+ tmp["RAT"] = "3G"
1007
+ summary_frames.append(tmp)
1008
+ if "summary_lte_avail" in locals() and summary_lte_avail is not None:
1009
+ tmp = summary_lte_avail.copy()
1010
+ tmp["RAT"] = "LTE"
1011
+ summary_frames.append(tmp)
1012
+
1013
+ if summary_frames:
1014
+ availability_summary_all = pd.concat(summary_frames, ignore_index=True)
1015
+ else:
1016
+ availability_summary_all = pd.DataFrame()
1017
+
1018
+ export_site_2g = (
1019
+ site_2g_avail
1020
+ if "site_2g_avail" in locals() and site_2g_avail is not None
1021
+ else pd.DataFrame()
1022
+ )
1023
+ export_site_3g = (
1024
+ site_3g_avail
1025
+ if "site_3g_avail" in locals() and site_3g_avail is not None
1026
+ else pd.DataFrame()
1027
+ )
1028
+ export_site_lte = (
1029
+ site_lte_avail
1030
+ if "site_lte_avail" in locals() and site_lte_avail is not None
1031
+ else pd.DataFrame()
1032
+ )
1033
+
1034
+ export_multi_rat = (
1035
+ multi_rat_df
1036
+ if "multi_rat_df" in locals() and multi_rat_df is not None
1037
+ else pd.DataFrame()
1038
+ )
1039
+
1040
  final_dfs = convert_dfs(
1041
  [
1042
  full_df,
 
1044
  avg_pre_post_analysis,
1045
  monthly_voice_df,
1046
  monthly_data_df,
1047
+ availability_summary_all,
1048
+ export_site_2g,
1049
+ export_site_3g,
1050
+ export_site_lte,
1051
+ export_multi_rat,
1052
  ],
1053
  [
1054
  "Global_Trafic_Analysis",
 
1056
  "Avg_pre_post_analysis",
1057
  "Monthly_voice_analysis",
1058
  "Monthly_data_analysis",
1059
+ "Availability_Summary_All_RAT",
1060
+ "TwoG_Availability_By_Site",
1061
+ "ThreeG_Availability_By_Site",
1062
+ "LTE_Availability_By_Site",
1063
+ "MultiRAT_Availability_By_Site",
1064
  ],
1065
  )
1066
  # 📥 Bouton de téléchargement