DavMelchi commited on
Commit
7cd89f2
·
1 Parent(s): e20c43b

Add 2G TCH availability analysis with SLA comparison and site-level reporting

Browse files
Files changed (1) hide show
  1. apps/kpi_analysis/trafic_analysis.py +134 -9
apps/kpi_analysis/trafic_analysis.py CHANGED
@@ -3,6 +3,7 @@ import zipfile
3
  from datetime import datetime
4
  from pathlib import Path
5
 
 
6
  import pandas as pd
7
  import plotly.express as px
8
  import streamlit as st
@@ -54,20 +55,37 @@ def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
54
  df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
55
  df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
56
  df["code"] = df["BCF name"].apply(extract_code)
 
 
 
57
  date_format = (
58
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
59
  )
60
  df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
61
  df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
62
- df = df.groupby(["date", "ID", "code"], as_index=False)[
63
- ["2g_data_trafic", "2g_voice_trafic"]
64
- ].sum()
 
 
 
 
 
 
 
 
 
 
 
65
  return df
66
 
67
 
68
  def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
69
  df = df[df["WBTS name"].str.len() >= 10].copy()
70
  df["code"] = df["WBTS name"].apply(extract_code)
 
 
 
71
  date_format = (
72
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
73
  )
@@ -93,6 +111,9 @@ def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
93
  + df["4G/LTE UL Traffic Volume (GBytes)"]
94
  )
95
  df["code"] = df["LNBTS name"].apply(extract_code)
 
 
 
96
  date_format = (
97
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
98
  )
@@ -128,8 +149,15 @@ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_r
128
  if col not in df:
129
  df[col] = 0
130
 
 
 
 
 
131
  df.fillna(0, inplace=True)
132
 
 
 
 
133
  df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
134
  df["total_data_trafic"] = (
135
  df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
@@ -260,6 +288,84 @@ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_r
260
  return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
261
 
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
264
  df["date"] = pd.to_datetime(df["date"])
265
 
@@ -333,6 +439,10 @@ with number_of_top_trafic_sites_col:
333
  "Number of top traffic sites", value=25
334
  )
335
 
 
 
 
 
336
  if len(pre_range) != 2 or len(post_range) != 2:
337
  st.warning("⚠️ Please select 2 dates for each period (pre and post).")
338
  st.stop()
@@ -347,10 +457,10 @@ if pre_range == post_range:
347
 
348
  # Warning if pre and post are overlapping
349
  if pre_range[0] < post_range[0] and pre_range[1] > post_range[1]:
350
- st.warning("⚠️ Pre and post periode are overlapping.")
351
  st.stop()
352
 
353
- if st.button("🔍 Run Analysis"):
354
 
355
  df_2g = read_uploaded_file(two_g_file)
356
  df_3g = read_uploaded_file(three_g_file)
@@ -376,13 +486,28 @@ if st.button("🔍 Run Analysis"):
376
  full_df["week"] = full_df["date"].dt.isocalendar().week
377
  full_df["year"] = full_df["date"].dt.isocalendar().year
378
 
379
- # 🔍 Display Summary
380
- st.success(" Analysis completed")
381
- st.subheader("📈 Summary Analysis Pre / Post")
382
  st.dataframe(sum_pre_post_analysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  TraficAnalysis.last_period_df = last_period
384
 
385
- #######################################################################################################""
386
 
387
  #######################################################################################################
388
  if TraficAnalysis.last_period_df is not None:
 
3
  from datetime import datetime
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
  import pandas as pd
8
  import plotly.express as px
9
  import streamlit as st
 
55
  df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
56
  df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
57
  df["code"] = df["BCF name"].apply(extract_code)
58
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
59
+ df = df[df["code"].notna()]
60
+ df["code"] = df["code"].astype(int)
61
  date_format = (
62
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
63
  )
64
  df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
65
  df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
66
+
67
+ if "TCH availability ratio" in df.columns:
68
+ df["2g_tch_avail"] = pd.to_numeric(
69
+ df["TCH availability ratio"], errors="coerce"
70
+ )
71
+
72
+ agg_dict = {
73
+ "2g_data_trafic": "sum",
74
+ "2g_voice_trafic": "sum",
75
+ }
76
+ if "2g_tch_avail" in df.columns:
77
+ agg_dict["2g_tch_avail"] = "mean"
78
+
79
+ df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
80
  return df
81
 
82
 
83
  def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
84
  df = df[df["WBTS name"].str.len() >= 10].copy()
85
  df["code"] = df["WBTS name"].apply(extract_code)
86
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
87
+ df = df[df["code"].notna()]
88
+ df["code"] = df["code"].astype(int)
89
  date_format = (
90
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
91
  )
 
111
  + df["4G/LTE UL Traffic Volume (GBytes)"]
112
  )
113
  df["code"] = df["LNBTS name"].apply(extract_code)
114
+ df["code"] = pd.to_numeric(df["code"], errors="coerce")
115
+ df = df[df["code"].notna()]
116
+ df["code"] = df["code"].astype(int)
117
  date_format = (
118
  "%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
119
  )
 
149
  if col not in df:
150
  df[col] = 0
151
 
152
+ tch_mask = None
153
+ if "2g_tch_avail" in df.columns:
154
+ tch_mask = df["2g_tch_avail"].notna()
155
+
156
  df.fillna(0, inplace=True)
157
 
158
+ if tch_mask is not None:
159
+ df.loc[~tch_mask, "2g_tch_avail"] = np.nan
160
+
161
  df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
162
  df["total_data_trafic"] = (
163
  df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
 
288
  return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
289
 
290
 
291
+ def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
292
+ avail_col = "2g_tch_avail"
293
+
294
+ if avail_col not in df.columns or "period" not in df.columns:
295
+ return None, None
296
+
297
+ df_2g = df[df[avail_col].notna()].copy()
298
+ df_2g = df_2g[df_2g["period"].isin(["pre", "post"])]
299
+
300
+ if df_2g.empty:
301
+ return None, None
302
+
303
+ site_pivot = df_2g.groupby(["code", "period"])[avail_col].mean().unstack()
304
+
305
+ site_pivot = site_pivot.rename(
306
+ columns={"pre": "tch_avail_pre", "post": "tch_avail_post"}
307
+ )
308
+
309
+ if "tch_avail_pre" not in site_pivot.columns:
310
+ site_pivot["tch_avail_pre"] = pd.NA
311
+ if "tch_avail_post" not in site_pivot.columns:
312
+ site_pivot["tch_avail_post"] = pd.NA
313
+
314
+ site_pivot["tch_avail_diff"] = (
315
+ site_pivot["tch_avail_post"] - site_pivot["tch_avail_pre"]
316
+ )
317
+ site_pivot["pre_ok_vs_sla"] = site_pivot["tch_avail_pre"] >= sla_2g
318
+ site_pivot["post_ok_vs_sla"] = site_pivot["tch_avail_post"] >= sla_2g
319
+
320
+ site_pivot = site_pivot.reset_index()
321
+
322
+ summary_rows = []
323
+ for period_label, col_name in [
324
+ ("pre", "tch_avail_pre"),
325
+ ("post", "tch_avail_post"),
326
+ ]:
327
+ series = site_pivot[col_name].dropna()
328
+ total_cells = series.shape[0]
329
+ if total_cells == 0:
330
+ summary_rows.append(
331
+ {
332
+ "period": period_label,
333
+ "cells": 0,
334
+ "avg_availability": pd.NA,
335
+ "median_availability": pd.NA,
336
+ "p05_availability": pd.NA,
337
+ "p95_availability": pd.NA,
338
+ "min_availability": pd.NA,
339
+ "max_availability": pd.NA,
340
+ "cells_ge_sla": 0,
341
+ "cells_lt_sla": 0,
342
+ "pct_cells_ge_sla": pd.NA,
343
+ }
344
+ )
345
+ continue
346
+ cells_ge_sla = (series >= sla_2g).sum()
347
+ cells_lt_sla = (series < sla_2g).sum()
348
+ summary_rows.append(
349
+ {
350
+ "period": period_label,
351
+ "cells": int(total_cells),
352
+ "avg_availability": series.mean(),
353
+ "median_availability": series.median(),
354
+ "p05_availability": series.quantile(0.05),
355
+ "p95_availability": series.quantile(0.95),
356
+ "min_availability": series.min(),
357
+ "max_availability": series.max(),
358
+ "cells_ge_sla": int(cells_ge_sla),
359
+ "cells_lt_sla": int(cells_lt_sla),
360
+ "pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
361
+ }
362
+ )
363
+
364
+ summary_df = pd.DataFrame(summary_rows)
365
+
366
+ return summary_df, site_pivot
367
+
368
+
369
  def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
370
  df["date"] = pd.to_datetime(df["date"])
371
 
 
439
  "Number of top traffic sites", value=25
440
  )
441
 
442
+ sla_2g_col, _ = st.columns(2)
443
+ with sla_2g_col:
444
+ sla_2g = st.number_input("2G TCH availability SLA (%)", value=98.0)
445
+
446
  if len(pre_range) != 2 or len(post_range) != 2:
447
  st.warning("⚠️ Please select 2 dates for each period (pre and post).")
448
  st.stop()
 
457
 
458
  # Warning if pre and post are overlapping
459
  if pre_range[0] < post_range[0] and pre_range[1] > post_range[1]:
460
+ st.warning(" Pre and post periode are overlapping.")
461
  st.stop()
462
 
463
+ if st.button(" Run Analysis"):
464
 
465
  df_2g = read_uploaded_file(two_g_file)
466
  df_3g = read_uploaded_file(three_g_file)
 
486
  full_df["week"] = full_df["date"].dt.isocalendar().week
487
  full_df["year"] = full_df["date"].dt.isocalendar().year
488
 
489
+ # Display Summary
490
+ st.success(" Analysis completed")
491
+ st.subheader(" Summary Analysis Pre / Post")
492
  st.dataframe(sum_pre_post_analysis)
493
+
494
+ summary_2g_avail, site_2g_avail = analyze_2g_availability(full_df, sla_2g)
495
+ if summary_2g_avail is not None:
496
+ st.subheader("2G - TCH Availability vs SLA")
497
+ st.write(f"SLA target 2G TCH availability: {sla_2g}%")
498
+ st.dataframe(summary_2g_avail.round(2))
499
+
500
+ st.subheader("2G - TCH Availability by site (worst 25 by post-period)")
501
+ worst_sites_2g = site_2g_avail.sort_values("tch_avail_post").head(25)
502
+ st.dataframe(worst_sites_2g.round(2))
503
+ else:
504
+ st.info(
505
+ "2G TCH availability KPI not found in input report or no data for selected periods."
506
+ )
507
+
508
  TraficAnalysis.last_period_df = last_period
509
 
510
+ #######################################################################################################"""
511
 
512
  #######################################################################################################
513
  if TraficAnalysis.last_period_df is not None: