Add 2G TCH availability analysis with SLA comparison and site-level reporting
Browse files
apps/kpi_analysis/trafic_analysis.py
CHANGED
|
@@ -3,6 +3,7 @@ import zipfile
|
|
| 3 |
from datetime import datetime
|
| 4 |
from pathlib import Path
|
| 5 |
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
import plotly.express as px
|
| 8 |
import streamlit as st
|
|
@@ -54,20 +55,37 @@ def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 54 |
df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
|
| 55 |
df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
|
| 56 |
df["code"] = df["BCF name"].apply(extract_code)
|
|
|
|
|
|
|
|
|
|
| 57 |
date_format = (
|
| 58 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 59 |
)
|
| 60 |
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
|
| 61 |
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
return df
|
| 66 |
|
| 67 |
|
| 68 |
def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
|
| 69 |
df = df[df["WBTS name"].str.len() >= 10].copy()
|
| 70 |
df["code"] = df["WBTS name"].apply(extract_code)
|
|
|
|
|
|
|
|
|
|
| 71 |
date_format = (
|
| 72 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 73 |
)
|
|
@@ -93,6 +111,9 @@ def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 93 |
+ df["4G/LTE UL Traffic Volume (GBytes)"]
|
| 94 |
)
|
| 95 |
df["code"] = df["LNBTS name"].apply(extract_code)
|
|
|
|
|
|
|
|
|
|
| 96 |
date_format = (
|
| 97 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 98 |
)
|
|
@@ -128,8 +149,15 @@ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_r
|
|
| 128 |
if col not in df:
|
| 129 |
df[col] = 0
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
df.fillna(0, inplace=True)
|
| 132 |
|
|
|
|
|
|
|
|
|
|
| 133 |
df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
|
| 134 |
df["total_data_trafic"] = (
|
| 135 |
df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
|
|
@@ -260,6 +288,84 @@ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_r
|
|
| 260 |
return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
|
| 261 |
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
| 264 |
df["date"] = pd.to_datetime(df["date"])
|
| 265 |
|
|
@@ -333,6 +439,10 @@ with number_of_top_trafic_sites_col:
|
|
| 333 |
"Number of top traffic sites", value=25
|
| 334 |
)
|
| 335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
if len(pre_range) != 2 or len(post_range) != 2:
|
| 337 |
st.warning("⚠️ Please select 2 dates for each period (pre and post).")
|
| 338 |
st.stop()
|
|
@@ -347,10 +457,10 @@ if pre_range == post_range:
|
|
| 347 |
|
| 348 |
# Warning if pre and post are overlapping
|
| 349 |
if pre_range[0] < post_range[0] and pre_range[1] > post_range[1]:
|
| 350 |
-
st.warning("
|
| 351 |
st.stop()
|
| 352 |
|
| 353 |
-
if st.button("
|
| 354 |
|
| 355 |
df_2g = read_uploaded_file(two_g_file)
|
| 356 |
df_3g = read_uploaded_file(three_g_file)
|
|
@@ -376,13 +486,28 @@ if st.button("🔍 Run Analysis"):
|
|
| 376 |
full_df["week"] = full_df["date"].dt.isocalendar().week
|
| 377 |
full_df["year"] = full_df["date"].dt.isocalendar().year
|
| 378 |
|
| 379 |
-
#
|
| 380 |
-
st.success("
|
| 381 |
-
st.subheader("
|
| 382 |
st.dataframe(sum_pre_post_analysis)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
TraficAnalysis.last_period_df = last_period
|
| 384 |
|
| 385 |
-
#######################################################################################################""
|
| 386 |
|
| 387 |
#######################################################################################################
|
| 388 |
if TraficAnalysis.last_period_df is not None:
|
|
|
|
| 3 |
from datetime import datetime
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
import pandas as pd
|
| 8 |
import plotly.express as px
|
| 9 |
import streamlit as st
|
|
|
|
| 55 |
df["2g_data_trafic"] = ((df["TRAFFIC_PS DL"] + df["PS_UL_Load"]) / 1000).round(1)
|
| 56 |
df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
|
| 57 |
df["code"] = df["BCF name"].apply(extract_code)
|
| 58 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 59 |
+
df = df[df["code"].notna()]
|
| 60 |
+
df["code"] = df["code"].astype(int)
|
| 61 |
date_format = (
|
| 62 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 63 |
)
|
| 64 |
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format=date_format)
|
| 65 |
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
|
| 66 |
+
|
| 67 |
+
if "TCH availability ratio" in df.columns:
|
| 68 |
+
df["2g_tch_avail"] = pd.to_numeric(
|
| 69 |
+
df["TCH availability ratio"], errors="coerce"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
agg_dict = {
|
| 73 |
+
"2g_data_trafic": "sum",
|
| 74 |
+
"2g_voice_trafic": "sum",
|
| 75 |
+
}
|
| 76 |
+
if "2g_tch_avail" in df.columns:
|
| 77 |
+
agg_dict["2g_tch_avail"] = "mean"
|
| 78 |
+
|
| 79 |
+
df = df.groupby(["date", "ID", "code"], as_index=False).agg(agg_dict)
|
| 80 |
return df
|
| 81 |
|
| 82 |
|
| 83 |
def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
|
| 84 |
df = df[df["WBTS name"].str.len() >= 10].copy()
|
| 85 |
df["code"] = df["WBTS name"].apply(extract_code)
|
| 86 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 87 |
+
df = df[df["code"].notna()]
|
| 88 |
+
df["code"] = df["code"].astype(int)
|
| 89 |
date_format = (
|
| 90 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 91 |
)
|
|
|
|
| 111 |
+ df["4G/LTE UL Traffic Volume (GBytes)"]
|
| 112 |
)
|
| 113 |
df["code"] = df["LNBTS name"].apply(extract_code)
|
| 114 |
+
df["code"] = pd.to_numeric(df["code"], errors="coerce")
|
| 115 |
+
df = df[df["code"].notna()]
|
| 116 |
+
df["code"] = df["code"].astype(int)
|
| 117 |
date_format = (
|
| 118 |
"%m.%d.%Y %H:%M:%S" if len(df["PERIOD_START_TIME"].iat[0]) > 10 else "%m.%d.%Y"
|
| 119 |
)
|
|
|
|
| 149 |
if col not in df:
|
| 150 |
df[col] = 0
|
| 151 |
|
| 152 |
+
tch_mask = None
|
| 153 |
+
if "2g_tch_avail" in df.columns:
|
| 154 |
+
tch_mask = df["2g_tch_avail"].notna()
|
| 155 |
+
|
| 156 |
df.fillna(0, inplace=True)
|
| 157 |
|
| 158 |
+
if tch_mask is not None:
|
| 159 |
+
df.loc[~tch_mask, "2g_tch_avail"] = np.nan
|
| 160 |
+
|
| 161 |
df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
|
| 162 |
df["total_data_trafic"] = (
|
| 163 |
df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
|
|
|
|
| 288 |
return df, last_period, sum_pivot.round(2), avg_pivot.round(2)
|
| 289 |
|
| 290 |
|
| 291 |
+
def analyze_2g_availability(df: pd.DataFrame, sla_2g: float):
|
| 292 |
+
avail_col = "2g_tch_avail"
|
| 293 |
+
|
| 294 |
+
if avail_col not in df.columns or "period" not in df.columns:
|
| 295 |
+
return None, None
|
| 296 |
+
|
| 297 |
+
df_2g = df[df[avail_col].notna()].copy()
|
| 298 |
+
df_2g = df_2g[df_2g["period"].isin(["pre", "post"])]
|
| 299 |
+
|
| 300 |
+
if df_2g.empty:
|
| 301 |
+
return None, None
|
| 302 |
+
|
| 303 |
+
site_pivot = df_2g.groupby(["code", "period"])[avail_col].mean().unstack()
|
| 304 |
+
|
| 305 |
+
site_pivot = site_pivot.rename(
|
| 306 |
+
columns={"pre": "tch_avail_pre", "post": "tch_avail_post"}
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
if "tch_avail_pre" not in site_pivot.columns:
|
| 310 |
+
site_pivot["tch_avail_pre"] = pd.NA
|
| 311 |
+
if "tch_avail_post" not in site_pivot.columns:
|
| 312 |
+
site_pivot["tch_avail_post"] = pd.NA
|
| 313 |
+
|
| 314 |
+
site_pivot["tch_avail_diff"] = (
|
| 315 |
+
site_pivot["tch_avail_post"] - site_pivot["tch_avail_pre"]
|
| 316 |
+
)
|
| 317 |
+
site_pivot["pre_ok_vs_sla"] = site_pivot["tch_avail_pre"] >= sla_2g
|
| 318 |
+
site_pivot["post_ok_vs_sla"] = site_pivot["tch_avail_post"] >= sla_2g
|
| 319 |
+
|
| 320 |
+
site_pivot = site_pivot.reset_index()
|
| 321 |
+
|
| 322 |
+
summary_rows = []
|
| 323 |
+
for period_label, col_name in [
|
| 324 |
+
("pre", "tch_avail_pre"),
|
| 325 |
+
("post", "tch_avail_post"),
|
| 326 |
+
]:
|
| 327 |
+
series = site_pivot[col_name].dropna()
|
| 328 |
+
total_cells = series.shape[0]
|
| 329 |
+
if total_cells == 0:
|
| 330 |
+
summary_rows.append(
|
| 331 |
+
{
|
| 332 |
+
"period": period_label,
|
| 333 |
+
"cells": 0,
|
| 334 |
+
"avg_availability": pd.NA,
|
| 335 |
+
"median_availability": pd.NA,
|
| 336 |
+
"p05_availability": pd.NA,
|
| 337 |
+
"p95_availability": pd.NA,
|
| 338 |
+
"min_availability": pd.NA,
|
| 339 |
+
"max_availability": pd.NA,
|
| 340 |
+
"cells_ge_sla": 0,
|
| 341 |
+
"cells_lt_sla": 0,
|
| 342 |
+
"pct_cells_ge_sla": pd.NA,
|
| 343 |
+
}
|
| 344 |
+
)
|
| 345 |
+
continue
|
| 346 |
+
cells_ge_sla = (series >= sla_2g).sum()
|
| 347 |
+
cells_lt_sla = (series < sla_2g).sum()
|
| 348 |
+
summary_rows.append(
|
| 349 |
+
{
|
| 350 |
+
"period": period_label,
|
| 351 |
+
"cells": int(total_cells),
|
| 352 |
+
"avg_availability": series.mean(),
|
| 353 |
+
"median_availability": series.median(),
|
| 354 |
+
"p05_availability": series.quantile(0.05),
|
| 355 |
+
"p95_availability": series.quantile(0.95),
|
| 356 |
+
"min_availability": series.min(),
|
| 357 |
+
"max_availability": series.max(),
|
| 358 |
+
"cells_ge_sla": int(cells_ge_sla),
|
| 359 |
+
"cells_lt_sla": int(cells_lt_sla),
|
| 360 |
+
"pct_cells_ge_sla": cells_ge_sla / total_cells * 100,
|
| 361 |
+
}
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
summary_df = pd.DataFrame(summary_rows)
|
| 365 |
+
|
| 366 |
+
return summary_df, site_pivot
|
| 367 |
+
|
| 368 |
+
|
| 369 |
def monthly_data_analysis(df: pd.DataFrame) -> pd.DataFrame:
|
| 370 |
df["date"] = pd.to_datetime(df["date"])
|
| 371 |
|
|
|
|
| 439 |
"Number of top traffic sites", value=25
|
| 440 |
)
|
| 441 |
|
| 442 |
+
sla_2g_col, _ = st.columns(2)
|
| 443 |
+
with sla_2g_col:
|
| 444 |
+
sla_2g = st.number_input("2G TCH availability SLA (%)", value=98.0)
|
| 445 |
+
|
| 446 |
if len(pre_range) != 2 or len(post_range) != 2:
|
| 447 |
st.warning("⚠️ Please select 2 dates for each period (pre and post).")
|
| 448 |
st.stop()
|
|
|
|
| 457 |
|
| 458 |
# Warning if pre and post are overlapping
|
| 459 |
if pre_range[0] < post_range[0] and pre_range[1] > post_range[1]:
|
| 460 |
+
st.warning(" Pre and post periode are overlapping.")
|
| 461 |
st.stop()
|
| 462 |
|
| 463 |
+
if st.button(" Run Analysis"):
|
| 464 |
|
| 465 |
df_2g = read_uploaded_file(two_g_file)
|
| 466 |
df_3g = read_uploaded_file(three_g_file)
|
|
|
|
| 486 |
full_df["week"] = full_df["date"].dt.isocalendar().week
|
| 487 |
full_df["year"] = full_df["date"].dt.isocalendar().year
|
| 488 |
|
| 489 |
+
# Display Summary
|
| 490 |
+
st.success(" Analysis completed")
|
| 491 |
+
st.subheader(" Summary Analysis Pre / Post")
|
| 492 |
st.dataframe(sum_pre_post_analysis)
|
| 493 |
+
|
| 494 |
+
summary_2g_avail, site_2g_avail = analyze_2g_availability(full_df, sla_2g)
|
| 495 |
+
if summary_2g_avail is not None:
|
| 496 |
+
st.subheader("2G - TCH Availability vs SLA")
|
| 497 |
+
st.write(f"SLA target 2G TCH availability: {sla_2g}%")
|
| 498 |
+
st.dataframe(summary_2g_avail.round(2))
|
| 499 |
+
|
| 500 |
+
st.subheader("2G - TCH Availability by site (worst 25 by post-period)")
|
| 501 |
+
worst_sites_2g = site_2g_avail.sort_values("tch_avail_post").head(25)
|
| 502 |
+
st.dataframe(worst_sites_2g.round(2))
|
| 503 |
+
else:
|
| 504 |
+
st.info(
|
| 505 |
+
"2G TCH availability KPI not found in input report or no data for selected periods."
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
TraficAnalysis.last_period_df = last_period
|
| 509 |
|
| 510 |
+
#######################################################################################################"""
|
| 511 |
|
| 512 |
#######################################################################################################
|
| 513 |
if TraficAnalysis.last_period_df is not None:
|