File size: 2,363 Bytes
53a6def
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Import libraries
import pandas as pd

# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'


def read_data(file):
    """
    Read in data source
    --------
    :param file: string filename
    :return: dataframe
    """
    df = pd.read_csv(file)
    return df


def format_data(data, IDs, onboard):
    """
    Convert datetime columns to datetime format, remove additional columns,
    filter to only include RECEIVER and scale up IDs, and join oboarding dates to admissions data
    --------
    :param data:dataframe
    :param IDs: dataframe containing RC and SU1 study IDs
    :param onboard: dataframe containing onboarding dates
    :return: formatted dataframe
    """
    data['admitted_1'] = pd.to_datetime(data['admitted_1'], utc=True)
    onboard['OB_date'] = pd.to_datetime(onboard['OB_date'], utc=True)
    data = data[['Study_ID', 'admitted_1']]
    onboard = onboard[['Study_ID', 'OB_date']]
    data = pd.merge(IDs, data, on="Study_ID", how="left")
    data = pd.merge(data, onboard, on="Study_ID", how="left")
    return data


def time_to_admission(data, date_of_admission, OB_date):
    """
    Calculate days from onboarding to first admission for those who had an admission in the study period
    --------
    :param onboard: dataframe containing onboarding and admissions data
    :param date_of_admission: datetime column showing date of first admission
    :param OB_date: datetime column showing onboarding dates
    :return: dataframe with additional column showing number of days to first admission for those who had an admission
    """
    data['days'] = (data['admitted_1'] - data['OB_date']).dt.days
    data.to_csv(file_path + 'Days_to_first_admission.csv')


def main():
    # Read data
    admissions_data_file = input_file_path + "admissions_data_up_to_31082021.csv"
    onboard_file = input_file_path + "onboarding_dates.csv"
    RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv"

    admissions_data = read_data(admissions_data_file)
    onboard = read_data(onboard_file)
    RC_SU1_IDs = read_data(RC_SU1_IDs_file)

    # Format data
    admissions_onboard = format_data(admissions_data, RC_SU1_IDs, onboard)

    # Determine time to first admission for each ID and save the dataframe
    time_to_admission(admissions_onboard, 'admitted_1', 'OB_date')


main()