| | import os |
| | import pandas as pd |
| |
|
| | data_dir = '<YOUR_DATA_PATH>/' |
| |
|
| | |
| | |
| | |
| | |
| | receiver = pd.read_csv(os.path.join(data_dir, 'Cohort3Rand.csv')) |
| | receiver = receiver.rename(columns={'RNo': 'StudyId'}) |
| |
|
| | |
| | scaleup = pd.read_csv(os.path.join(data_dir, 'SU_IDs', 'Scale_Up_lookup.csv')) |
| | scaleup = scaleup.rename(columns={'Study_Number': 'StudyId'}) |
| |
|
| | |
| | all_patients = pd.concat([receiver, scaleup]).dropna() |
| |
|
| | |
| | all_patients.to_pickle(os.path.join(data_dir, 'sh_to_studyid_mapping.pkl')) |
| |
|
| | |
| | lenus_demographics = pd.read_csv(os.path.join(data_dir, 'copd-dataset', |
| | 'CopdDatasetPatientDetails.txt'), |
| | usecols=['StudyId', 'DateOfBirth', 'Sex'], sep='|') |
| | sh_demographics = pd.read_csv(os.path.join(data_dir, 'EXAMPLE_STUDY_DATA', |
| | 'Demographics_Cohort4.csv'), |
| | usecols=['SafeHavenID', 'SEX', 'OBF_DOB']) |
| |
|
| | sh_demographics['OBF_DOB'] = pd.to_datetime( |
| | sh_demographics['OBF_DOB'], utc=True).dt.normalize() |
| |
|
| | mapping = all_patients.merge(sh_demographics, on='SafeHavenID', how='inner') |
| | mapping = mapping.merge(lenus_demographics, on='StudyId', how='inner') |
| |
|
| | |
| | mapping[mapping.SEX != mapping.Sex] |
| | |
| | all_patients[all_patients.duplicated(subset='SafeHavenID')] |
| |
|
| | |
| | mapping[mapping.OBF_DOB != mapping.DateOfBirth] |
| |
|