| import pandas as pd |
|
|
| from utils.kpi_analysis_utils import ( |
| cell_availability_analysis, |
| combine_comments, |
| create_daily_date, |
| create_dfs_per_kpi, |
| kpi_naming_cleaning, |
| ) |
|
|
|
|
| class WbtsCapacity: |
| final_results: pd.DataFrame = None |
|
|
|
|
| def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str: |
| """ |
| Check if any value in the row deviates more than max_diff from the most common value. |
| |
| Args: |
| row: Series of values to check for deviation |
| max_diff: Maximum allowed difference from the most common value |
| type: Type identifier for the deviation message |
| |
| Returns: |
| A message indicating deviation if found, otherwise an empty string |
| """ |
| numeric_row = row.astype(float) |
| mode_series = numeric_row.mode() |
|
|
| |
| most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0] |
|
|
| diffs = abs(numeric_row - most_common) |
|
|
| if (diffs > max_diff).any(): |
| return f"{type} Deviation > {max_diff} detected" |
| else: |
| return "" |
|
|
|
|
| def max_used_bb_subunits_analysis( |
| df: pd.DataFrame, |
| days: int = 7, |
| threshold: int = 80, |
| number_of_threshold_days: int = 3, |
| ) -> pd.DataFrame: |
| """ |
| Analyze maximum used baseband subunits and identify sites needing upgrades. |
| |
| Args: |
| df: DataFrame containing baseband utilization data |
| days: Number of days to analyze |
| threshold: Utilization threshold percentage for flagging |
| number_of_threshold_days: Minimum days above threshold to flag for upgrade |
| |
| Returns: |
| DataFrame with analysis results and upgrade recommendations |
| """ |
| result_df = df.copy() |
| last_days_df = result_df.iloc[:, -days:] |
| last_days_df = last_days_df.fillna(0) |
|
|
| result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2) |
| |
| result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply( |
| lambda row: sum(1 for x in row if x >= threshold), axis=1 |
| ) |
|
|
| |
| result_df["Average_used_bb_ratio_comment"] = "" |
|
|
| |
| result_df.loc[ |
| (result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days) |
| & (result_df["Average_used_bb_ratio"] >= threshold), |
| "Average_used_bb_ratio_comment", |
| ] = "need BB upgrade" |
|
|
| return result_df |
|
|
|
|
| def max_used_ce_analysis( |
| df: pd.DataFrame, |
| days: int = 7, |
| threshold: int = 80, |
| number_of_threshold_days: int = 3, |
| ) -> pd.DataFrame: |
| """ |
| Analyze maximum used channel elements and identify sites needing upgrades. |
| |
| Args: |
| df: DataFrame containing channel element utilization data |
| days: Number of days to analyze |
| threshold: Utilization threshold percentage for flagging |
| number_of_threshold_days: Minimum days above threshold to flag for upgrade |
| |
| Returns: |
| DataFrame with analysis results and upgrade recommendations |
| """ |
| result_df = df.copy().fillna(0) |
| last_days_df = result_df.iloc[:, -days:] |
|
|
| result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2) |
|
|
| |
| result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply( |
| lambda row: sum(1 for x in row if x >= threshold), axis=1 |
| ) |
|
|
| |
| result_df["Average_used_ce_ratio_comment"] = "" |
|
|
| |
| result_df.loc[ |
| (result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days) |
| & (result_df["Average_used_ce_ratio"] >= threshold), |
| "Average_used_ce_ratio_comment", |
| ] = "need CE upgrade" |
|
|
| return result_df |
|
|
|
|
| def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame: |
| """ |
| Analyze baseband subunit count for deviations. |
| |
| Args: |
| df: DataFrame containing baseband subunit count data |
| days: Number of days to analyze |
| |
| Returns: |
| DataFrame with deviation analysis comments |
| """ |
| result_df = df.copy() |
| last_days_df = result_df.iloc[:, -days:] |
| result_df["num_bb_subunits_comment"] = last_days_df.apply( |
| lambda row: check_deviation(row, type="bb"), axis=1 |
| ) |
| return result_df |
|
|
|
|
| def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame: |
| """ |
| Analyze available channel elements for deviations. |
| |
| Args: |
| df: DataFrame containing available channel element data |
| days: Number of days to analyze |
| |
| Returns: |
| DataFrame with deviation analysis comments |
| """ |
| result_df = df.copy() |
| last_days_df = result_df.iloc[:, -days:] |
| result_df["avail_ce_comment"] = last_days_df.apply( |
| lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1 |
| ) |
| return result_df |
|
|
|
|
| def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: |
| """ |
| Combine baseband related comments into a single column. |
| |
| Args: |
| df: DataFrame containing baseband comment columns |
| |
| Returns: |
| DataFrame with combined baseband comments |
| """ |
| return combine_comments( |
| df, |
| "num_bb_subunits_comment", |
| "Average_used_bb_ratio_comment", |
| "availability_comment_daily", |
| new_column="bb_comments", |
| ) |
|
|
|
|
| def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: |
| """ |
| Combine channel element related comments into a single column. |
| |
| Args: |
| df: DataFrame containing channel element comment columns |
| |
| Returns: |
| DataFrame with combined channel element comments |
| """ |
| return combine_comments( |
| df, |
| "avail_ce_comment", |
| "Average_used_ce_ratio_comment", |
| "availability_comment_daily", |
| new_column="ce_comments", |
| ) |
|
|
|
|
| def wbts_kpi_analysis( |
| df: pd.DataFrame, |
| num_days: int = 7, |
| threshold: int = 80, |
| number_of_threshold_days: int = 3, |
| ) -> pd.DataFrame: |
| """ |
| Create pivoted DataFrames for each KPI and perform analysis. |
| |
| Args: |
| df: DataFrame containing KPI data |
| num_days: Number of days to analyze |
| threshold: Utilization threshold percentage for flagging |
| number_of_threshold_days: Minimum days above threshold to flag for upgrade |
| |
| Returns: |
| DataFrame with combined analysis results |
| """ |
| |
| pivoted_kpi_dfs = {} |
|
|
| pivoted_kpi_dfs = create_dfs_per_kpi( |
| df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5 |
| ) |
|
|
| |
| wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0] |
| licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"] |
| max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"] |
| max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"] |
| max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"] |
| max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"] |
| num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"] |
| max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"] |
| cell_availability_df = pivoted_kpi_dfs[ |
| "Cell_Availability_excluding_blocked_by_user_state_BLU" |
| ] |
| total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"] |
| total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"] |
| max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"] |
|
|
| |
| max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis( |
| max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days |
| ) |
| cell_availability_df = cell_availability_analysis(cell_availability_df, num_days) |
| max_used_ce_ratio_flexi_df = max_used_ce_analysis( |
| max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days |
| ) |
| num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days) |
| licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days) |
|
|
| |
| result_df = pd.concat( |
| [ |
| wbts_name_df, |
| licensed_ce_df, |
| max_used_ce_dl_df, |
| max_used_ce_ul_df, |
| max_avail_ce_df, |
| max_used_bb_subunits_df, |
| num_bb_subunits_df, |
| max_bb_sus_util_ratio_df, |
| cell_availability_df, |
| total_cs_traffic_df, |
| total_data_traffic_df, |
| max_used_ce_ratio_flexi_df, |
| ], |
| axis=1, |
| ) |
|
|
| |
| result_df = bb_comments_analysis(result_df) |
| result_df = ce_comments_analysis(result_df) |
|
|
| return result_df |
|
|
|
|
| def load_data( |
| filepath: str, |
| num_days: int, |
| threshold: int, |
| number_of_threshold_days: int, |
| ) -> pd.DataFrame: |
| """ |
| Load data from CSV file and perform preprocessing and analysis. |
| |
| Args: |
| filepath: Path to CSV file or uploaded file object |
| num_days: Number of days to analyze |
| threshold: Utilization threshold percentage for flagging |
| number_of_threshold_days: Minimum days above threshold to flag for upgrade |
| |
| Returns: |
| DataFrame with processed and analyzed data |
| """ |
| df = pd.read_csv(filepath, delimiter=";") |
|
|
| |
| df = create_daily_date(df) |
| df = kpi_naming_cleaning(df) |
|
|
| |
| df = df[["date"] + [col for col in df.columns if col not in ["date"]]] |
| df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]] |
|
|
| |
| df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days) |
|
|
| |
| |
|
|
| return df |
|
|