copd-model-e / validation /parameter_calculation /Time_to_first_admission_calculations.py
IamGrooooot's picture
Model E: Unsupervised PCA + clustering risk stratification
53a6def
# Import libraries
import pandas as pd
# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'
def read_data(file):
"""
Read in data source
--------
:param file: string filename
:return: dataframe
"""
df = pd.read_csv(file)
return df
def format_data(data, IDs, onboard):
"""
Convert datetime columns to datetime format, remove additional columns,
filter to only include RECEIVER and scale up IDs, and join oboarding dates to admissions data
--------
:param data:dataframe
:param IDs: dataframe containing RC and SU1 study IDs
:param onboard: dataframe containing onboarding dates
:return: formatted dataframe
"""
data['admitted_1'] = pd.to_datetime(data['admitted_1'], utc=True)
onboard['OB_date'] = pd.to_datetime(onboard['OB_date'], utc=True)
data = data[['Study_ID', 'admitted_1']]
onboard = onboard[['Study_ID', 'OB_date']]
data = pd.merge(IDs, data, on="Study_ID", how="left")
data = pd.merge(data, onboard, on="Study_ID", how="left")
return data
def time_to_admission(data, date_of_admission, OB_date):
"""
Calculate days from onboarding to first admission for those who had an admission in the study period
--------
:param onboard: dataframe containing onboarding and admissions data
:param date_of_admission: datetime column showing date of first admission
:param OB_date: datetime column showing onboarding dates
:return: dataframe with additional column showing number of days to first admission for those who had an admission
"""
data['days'] = (data['admitted_1'] - data['OB_date']).dt.days
data.to_csv(file_path + 'Days_to_first_admission.csv')
def main():
# Read data
admissions_data_file = input_file_path + "admissions_data_up_to_31082021.csv"
onboard_file = input_file_path + "onboarding_dates.csv"
RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv"
admissions_data = read_data(admissions_data_file)
onboard = read_data(onboard_file)
RC_SU1_IDs = read_data(RC_SU1_IDs_file)
# Format data
admissions_onboard = format_data(admissions_data, RC_SU1_IDs, onboard)
# Determine time to first admission for each ID and save the dataframe
time_to_admission(admissions_onboard, 'admitted_1', 'OB_date')
main()