File size: 2,101 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | # Import libraries
import pandas as pd
import numpy as np
# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'
def read_data(file):
"""
Read in data source
--------
:param file: string filename
:return: dataframe
"""
df = pd.read_csv(file)
return df
def format_data(onboard, IDs):
"""
Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs,
and add Date of death column
--------
:param onboard: dataframe containing onboarding dates
:param IDs: dataframe containing IDs of interest
:return: formatted dataframe
"""
onboard['OB_date'] = pd.to_datetime(onboard['OB_date'])
onboard['censor'] = pd.to_datetime(onboard['censor'])
onboard = pd.merge(IDs, onboard, on="Study_ID", how="left")
conditions_DOD = [onboard['censor'] != '2021-08-31']
values_DOD = [onboard['censor'].dt.date]
onboard['DOD'] = np.select(conditions_DOD, values_DOD, default=None)
onboard['DOD'] = pd.to_datetime(onboard['DOD'])
return onboard
def calculate_suvival(onboard, date_of_death, OB_date):
"""
Calculate days from onboarding to date of death for those who died over the course of the RECEIVER study
and save the dataframe
--------
:param onboard: dataframe containing onboarding and date of death data
:param date of death: datetime column showing date of death
:param OB_date: datetime column showing onboarding date
"""
onboard['days'] = (onboard[date_of_death] - onboard[OB_date]).dt.days
onboard.to_csv(file_path + 'Time_to_death_for_cohorts.csv')
def main():
# Read in data
onboard_file = input_file_path + "onboarding_dates.csv"
RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv"
onboard = read_data(onboard_file)
RC_SU1_IDs = read_data(RC_SU1_IDs_file)
# Format data
onboard = format_data(onboard, RC_SU1_IDs)
# Calculate days alive following onboarding and save the dataframe
calculate_suvival(onboard, 'DOD', 'OB_date')
main() |