| | |
| | import pandas as pd |
| | import numpy as np |
| |
|
| | |
| | file_path = '<YOUR_DATA_PATH>/' |
| | input_file_path = file_path + 'data_for_model_e_columns/' |
| |
|
| |
|
| | def read_data(file): |
| | """ |
| | Read in data source |
| | -------- |
| | :param file: string filename |
| | :return: dataframe |
| | """ |
| | df = pd.read_csv(file) |
| | return df |
| |
|
| |
|
| | def format_data(onboard, IDs): |
| | """ |
| | Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, |
| | and add Date of death column |
| | -------- |
| | :param onboard: dataframe containing onboarding dates |
| | :param IDs: dataframe containing IDs of interest |
| | :return: formatted dataframe |
| | """ |
| | onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) |
| | onboard['censor'] = pd.to_datetime(onboard['censor']) |
| | onboard = pd.merge(IDs, onboard, on="Study_ID", how="left") |
| | conditions_DOD = [onboard['censor'] != '2021-08-31'] |
| | values_DOD = [onboard['censor'].dt.date] |
| | onboard['DOD'] = np.select(conditions_DOD, values_DOD, default=None) |
| | onboard['DOD'] = pd.to_datetime(onboard['DOD']) |
| | return onboard |
| |
|
| |
|
| | def calculate_suvival(onboard, date_of_death, OB_date): |
| | """ |
| | Calculate days from onboarding to date of death for those who died over the course of the RECEIVER study |
| | and save the dataframe |
| | -------- |
| | :param onboard: dataframe containing onboarding and date of death data |
| | :param date of death: datetime column showing date of death |
| | :param OB_date: datetime column showing onboarding date |
| | """ |
| | onboard['days'] = (onboard[date_of_death] - onboard[OB_date]).dt.days |
| | onboard.to_csv(file_path + 'Time_to_death_for_cohorts.csv') |
| |
|
| |
|
| | def main(): |
| | |
| | onboard_file = input_file_path + "onboarding_dates.csv" |
| | RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" |
| | |
| | onboard = read_data(onboard_file) |
| | RC_SU1_IDs = read_data(RC_SU1_IDs_file) |
| |
|
| | |
| | onboard = format_data(onboard, RC_SU1_IDs) |
| |
|
| | |
| | calculate_suvival(onboard, 'DOD', 'OB_date') |
| |
|
| | |
| | main() |