# Import libraries import pandas as pd # Set file paths file_path = '/' input_file_path = file_path + 'data_for_model_e_columns/' def read_data(file): """ Read in data source -------- :param file: string filename :return: dataframe """ df = pd.read_csv(file) return df def format_data(data, IDs, onboard): """ Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, and join oboarding dates -------- :param data: NIV dataframe :param IDs: dataframe containing Study IDs :param onboard: dataframe containing onboarding dates :return: formatted dataframe """ data = data[['Study_ID', 'ie_ratio_value_50', 'ie_ratio_value_95', 'ie_ratio_maximum_value', 'resp_events_AHI', 'resp_events_HI', 'Stop_time', 'Start_time']] data['Stop_time'] = pd.to_datetime(data['Stop_time']) onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) onboard['yearcensor'] = onboard['OB_date'] + pd.offsets.DateOffset(days=365) data = pd.merge(IDs, data, on="Study_ID", how="left") data = pd.merge(data, onboard, on="Study_ID", how="left") return data def filter_study_censor(data): """ Filter the dataframe to only contain data obtained before the study censor date -------- :param data: dataframe :return: dataframe containing data obtained before the study censor date """ return data[data['Stop_time'] < '2021-09-01'] def filter_first_year(data): """ Filter the dataframe to only contain data obtained in the first year post-onboarding -------- :param data: dataframe :return: dataframe containing only data obtained in the first year post-onboarding """ return data[data['yearcensor'] >= data['Stop_time']] def mean_max_summary(data, col): """ Create a dataframe showing mean and max values per group -------- :param data: dataframe :param col: parameter to group on :return: summary dataframe showing mean and max scores for each study ID """ summary_metrics = ['mean', 'max', 'count'] return data.groupby(col).agg( {'ie_ratio_value_50': summary_metrics, 'ie_ratio_value_95': summary_metrics, 'ie_ratio_maximum_value': summary_metrics, 'resp_events_AHI': summary_metrics, 'resp_events_HI': summary_metrics}) def calculate_summary_data(data): """ Calculate the average NIV parameters up to the study censor date and a year after onboarding for each study ID and save the resulting summary dataframe as a csv file -------- :param data: dataframe :param typ: string value to be input into file name showing what is summarised """ data_filter_censor = filter_study_censor(data) summary_censor = mean_max_summary(data_filter_censor, 'Study_ID') data_year_censor = filter_first_year(data) summary_year = mean_max_summary(data_year_censor, 'Study_ID') output_file_path = file_path + 'NIV_ Average_parameters_to_' summary_censor.to_csv(output_file_path + 'censor.csv') summary_year.to_csv(output_file_path + 'year.csv') def main(): # Read data NIV_data_file = input_file_path + "NIV_data_wrangled.csv" onboard_file = input_file_path + "onboarding_dates.csv" RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" NIV_data = read_data(NIV_data_file) onboard = read_data(onboard_file) RC_SU1_IDs = read_data(RC_SU1_IDs_file) # Format data NIV_data = format_data(NIV_data, RC_SU1_IDs, onboard) # Calculate and save summary NIV data to year and study censor dates for each ID calculate_summary_data(NIV_data) main()