# Import libraries import pandas as pd import numpy as np # Set file paths file_path = '/' input_file_path = file_path + 'data_for_model_e_columns/' def read_data(file): """ Read in data source -------- :param file: string filename :return: dataframe """ df = pd.read_csv(file) return df def format_data(onboard, IDs): """ Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, and add Date of death column -------- :param onboard: dataframe containing onboarding dates :param IDs: dataframe containing IDs of interest :return: formatted dataframe """ onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) onboard['censor'] = pd.to_datetime(onboard['censor']) onboard = pd.merge(IDs, onboard, on="Study_ID", how="left") conditions_DOD = [onboard['censor'] != '2021-08-31'] values_DOD = [onboard['censor'].dt.date] onboard['DOD'] = np.select(conditions_DOD, values_DOD, default=None) onboard['DOD'] = pd.to_datetime(onboard['DOD']) return onboard def calculate_suvival(onboard, date_of_death, OB_date): """ Calculate days from onboarding to date of death for those who died over the course of the RECEIVER study and save the dataframe -------- :param onboard: dataframe containing onboarding and date of death data :param date of death: datetime column showing date of death :param OB_date: datetime column showing onboarding date """ onboard['days'] = (onboard[date_of_death] - onboard[OB_date]).dt.days onboard.to_csv(file_path + 'Time_to_death_for_cohorts.csv') def main(): # Read in data onboard_file = input_file_path + "onboarding_dates.csv" RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" onboard = read_data(onboard_file) RC_SU1_IDs = read_data(RC_SU1_IDs_file) # Format data onboard = format_data(onboard, RC_SU1_IDs) # Calculate days alive following onboarding and save the dataframe calculate_suvival(onboard, 'DOD', 'OB_date') main()