| | |
| | import pandas as pd |
| |
|
| | |
| | file_path = '<YOUR_DATA_PATH>/' |
| | input_file_path = file_path + 'data_for_model_e_columns/' |
| |
|
| |
|
| | def read_data(file): |
| | """ |
| | Read in data source |
| | -------- |
| | :param file: string filename |
| | :return: dataframe |
| | """ |
| | df = pd.read_csv(file) |
| | return df |
| |
|
| |
|
| | def format_data(data, IDs, onboard): |
| | """ |
| | Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, |
| | and join oboarding dates |
| | -------- |
| | :param data: NIV dataframe |
| | :param IDs: dataframe containing Study IDs |
| | :param onboard: dataframe containing onboarding dates |
| | :return: formatted dataframe |
| | """ |
| | data = data[['Study_ID', 'ie_ratio_value_50', 'ie_ratio_value_95', |
| | 'ie_ratio_maximum_value', 'resp_events_AHI', |
| | 'resp_events_HI', 'Stop_time', 'Start_time']] |
| | data['Stop_time'] = pd.to_datetime(data['Stop_time']) |
| | onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) |
| | onboard['yearcensor'] = onboard['OB_date'] + pd.offsets.DateOffset(days=365) |
| | data = pd.merge(IDs, data, on="Study_ID", how="left") |
| | data = pd.merge(data, onboard, on="Study_ID", how="left") |
| | return data |
| |
|
| |
|
| | def filter_study_censor(data): |
| | """ |
| | Filter the dataframe to only contain data obtained before the study censor date |
| | -------- |
| | :param data: dataframe |
| | :return: dataframe containing data obtained before the study censor date |
| | """ |
| | return data[data['Stop_time'] < '2021-09-01'] |
| |
|
| |
|
| | def filter_first_year(data): |
| | """ |
| | Filter the dataframe to only contain data obtained in the first year post-onboarding |
| | -------- |
| | :param data: dataframe |
| | :return: dataframe containing only data obtained in the first year post-onboarding |
| | """ |
| | return data[data['yearcensor'] >= data['Stop_time']] |
| |
|
| |
|
| | def mean_max_summary(data, col): |
| | """ |
| | Create a dataframe showing mean and max values per group |
| | -------- |
| | :param data: dataframe |
| | :param col: parameter to group on |
| | :return: summary dataframe showing mean and max scores for each study ID |
| | """ |
| | summary_metrics = ['mean', 'max', 'count'] |
| | return data.groupby(col).agg( |
| | {'ie_ratio_value_50': summary_metrics, |
| | 'ie_ratio_value_95': summary_metrics, |
| | 'ie_ratio_maximum_value': summary_metrics, |
| | 'resp_events_AHI': summary_metrics, |
| | 'resp_events_HI': summary_metrics}) |
| |
|
| |
|
| | def calculate_summary_data(data): |
| | """ |
| | Calculate the average NIV parameters up to the study censor date and a year |
| | after onboarding for each study ID and save the resulting summary |
| | dataframe as a csv file |
| | -------- |
| | :param data: dataframe |
| | :param typ: string value to be input into file name showing what is summarised |
| | """ |
| | data_filter_censor = filter_study_censor(data) |
| | summary_censor = mean_max_summary(data_filter_censor, 'Study_ID') |
| |
|
| | data_year_censor = filter_first_year(data) |
| | summary_year = mean_max_summary(data_year_censor, 'Study_ID') |
| | |
| | output_file_path = file_path + 'NIV_ Average_parameters_to_' |
| | summary_censor.to_csv(output_file_path + 'censor.csv') |
| | summary_year.to_csv(output_file_path + 'year.csv') |
| |
|
| |
|
| | def main(): |
| | |
| | NIV_data_file = input_file_path + "NIV_data_wrangled.csv" |
| | onboard_file = input_file_path + "onboarding_dates.csv" |
| | RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" |
| |
|
| | NIV_data = read_data(NIV_data_file) |
| | onboard = read_data(onboard_file) |
| | RC_SU1_IDs = read_data(RC_SU1_IDs_file) |
| |
|
| | |
| | NIV_data = format_data(NIV_data, RC_SU1_IDs, onboard) |
| |
|
| | |
| | calculate_summary_data(NIV_data) |
| |
|
| |
|
| | main() |