| | |
| | import pandas as pd |
| |
|
| | |
| | file_path = '<YOUR_DATA_PATH>/' |
| | input_file_path = file_path + 'data_for_model_e_columns/' |
| |
|
| |
|
| | def read_data(file): |
| | """ |
| | Read in data source |
| | -------- |
| | :param file: string filename |
| | :return: dataframe |
| | """ |
| | df = pd.read_csv(file) |
| | return df |
| |
|
| |
|
| | def format_data(data, IDs, onboard): |
| | """ |
| | Convert datetime columns to datetime format, remove additional columns, |
| | filter to only include RECEIVER and scale up IDs, and join oboarding dates to admissions data |
| | -------- |
| | :param data:dataframe |
| | :param IDs: dataframe containing RC and SU1 study IDs |
| | :param onboard: dataframe containing onboarding dates |
| | :return: formatted dataframe |
| | """ |
| | data['admitted_1'] = pd.to_datetime(data['admitted_1'], utc=True) |
| | onboard['OB_date'] = pd.to_datetime(onboard['OB_date'], utc=True) |
| | data = data[['Study_ID', 'admitted_1']] |
| | onboard = onboard[['Study_ID', 'OB_date']] |
| | data = pd.merge(IDs, data, on="Study_ID", how="left") |
| | data = pd.merge(data, onboard, on="Study_ID", how="left") |
| | return data |
| |
|
| |
|
| | def time_to_admission(data, date_of_admission, OB_date): |
| | """ |
| | Calculate days from onboarding to first admission for those who had an admission in the study period |
| | -------- |
| | :param onboard: dataframe containing onboarding and admissions data |
| | :param date_of_admission: datetime column showing date of first admission |
| | :param OB_date: datetime column showing onboarding dates |
| | :return: dataframe with additional column showing number of days to first admission for those who had an admission |
| | """ |
| | data['days'] = (data['admitted_1'] - data['OB_date']).dt.days |
| | data.to_csv(file_path + 'Days_to_first_admission.csv') |
| |
|
| |
|
| | def main(): |
| | |
| | admissions_data_file = input_file_path + "admissions_data_up_to_31082021.csv" |
| | onboard_file = input_file_path + "onboarding_dates.csv" |
| | RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" |
| |
|
| | admissions_data = read_data(admissions_data_file) |
| | onboard = read_data(onboard_file) |
| | RC_SU1_IDs = read_data(RC_SU1_IDs_file) |
| |
|
| | |
| | admissions_onboard = format_data(admissions_data, RC_SU1_IDs, onboard) |
| |
|
| | |
| | time_to_admission(admissions_onboard, 'admitted_1', 'OB_date') |
| |
|
| |
|
| | main() |