File size: 3,732 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # Import libraries
import pandas as pd
# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'
def read_data(file):
"""
Read in data source
--------
:param file: string filename
:return: dataframe
"""
df = pd.read_csv(file)
return df
def format_data(data, IDs, onboard):
"""
Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs,
and join oboarding dates
--------
:param data: NIV dataframe
:param IDs: dataframe containing Study IDs
:param onboard: dataframe containing onboarding dates
:return: formatted dataframe
"""
data = data[['Study_ID', 'ie_ratio_value_50', 'ie_ratio_value_95',
'ie_ratio_maximum_value', 'resp_events_AHI',
'resp_events_HI', 'Stop_time', 'Start_time']]
data['Stop_time'] = pd.to_datetime(data['Stop_time'])
onboard['OB_date'] = pd.to_datetime(onboard['OB_date'])
onboard['yearcensor'] = onboard['OB_date'] + pd.offsets.DateOffset(days=365)
data = pd.merge(IDs, data, on="Study_ID", how="left")
data = pd.merge(data, onboard, on="Study_ID", how="left")
return data
def filter_study_censor(data):
"""
Filter the dataframe to only contain data obtained before the study censor date
--------
:param data: dataframe
:return: dataframe containing data obtained before the study censor date
"""
return data[data['Stop_time'] < '2021-09-01']
def filter_first_year(data):
"""
Filter the dataframe to only contain data obtained in the first year post-onboarding
--------
:param data: dataframe
:return: dataframe containing only data obtained in the first year post-onboarding
"""
return data[data['yearcensor'] >= data['Stop_time']]
def mean_max_summary(data, col):
"""
Create a dataframe showing mean and max values per group
--------
:param data: dataframe
:param col: parameter to group on
:return: summary dataframe showing mean and max scores for each study ID
"""
summary_metrics = ['mean', 'max', 'count']
return data.groupby(col).agg(
{'ie_ratio_value_50': summary_metrics,
'ie_ratio_value_95': summary_metrics,
'ie_ratio_maximum_value': summary_metrics,
'resp_events_AHI': summary_metrics,
'resp_events_HI': summary_metrics})
def calculate_summary_data(data):
"""
Calculate the average NIV parameters up to the study censor date and a year
after onboarding for each study ID and save the resulting summary
dataframe as a csv file
--------
:param data: dataframe
:param typ: string value to be input into file name showing what is summarised
"""
data_filter_censor = filter_study_censor(data)
summary_censor = mean_max_summary(data_filter_censor, 'Study_ID')
data_year_censor = filter_first_year(data)
summary_year = mean_max_summary(data_year_censor, 'Study_ID')
output_file_path = file_path + 'NIV_ Average_parameters_to_'
summary_censor.to_csv(output_file_path + 'censor.csv')
summary_year.to_csv(output_file_path + 'year.csv')
def main():
# Read data
NIV_data_file = input_file_path + "NIV_data_wrangled.csv"
onboard_file = input_file_path + "onboarding_dates.csv"
RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv"
NIV_data = read_data(NIV_data_file)
onboard = read_data(onboard_file)
RC_SU1_IDs = read_data(RC_SU1_IDs_file)
# Format data
NIV_data = format_data(NIV_data, RC_SU1_IDs, onboard)
# Calculate and save summary NIV data to year and study censor dates for each ID
calculate_summary_data(NIV_data)
main() |