| | """ |
| | Map GOLD standard COPD groupings from REC/SUP IDs to SafeHavenIDs. |
| | -------- |
| | NB: Data contained within 'RC_SU1_spirometry_data.csv' has been created using |
| | from data within the teams space. |
| | """ |
| | import pandas as pd |
| |
|
| |
|
| | |
| | file_path = '<YOUR_DATA_PATH>/copd.model-e/' |
| | input_file_path = file_path + 'training/src/data/' |
| | output_file_path = '<YOUR_DATA_PATH>/Model_E_Extracts/rec_sup_spirometry_data.pkl' |
| |
|
| |
|
| | def read_data(file): |
| | """ |
| | Read in data source |
| | -------- |
| | :param file: string filename |
| | :return: dataframe |
| | """ |
| | df = pd.read_csv(file) |
| |
|
| | return df |
| |
|
| |
|
| | def calc_gold_grade(data): |
| | """ |
| | Calculate GOLD grade for COPD classification using FEV1% |
| | -------- |
| | :param data: dataframe containing FEV1% column |
| | :return: GOLD grade values based on if else statement |
| | """ |
| | fev1 = data['FEV1%'] |
| | if fev1 >= 80: |
| | val = 'GOLD 1' |
| | elif (fev1 >= 50) & (fev1 < 80): |
| | val = 'GOLD 2' |
| | elif (fev1 >= 30) & (fev1 < 50): |
| | val = 'GOLD 3' |
| | elif fev1 < 30: |
| | val = 'GOLD 4' |
| | else: |
| | val = '' |
| |
|
| | return val |
| |
|
| |
|
| | def add_SH_mappings_for_RC_and_SU1(RC_IDs, SU1_IDs, spirometry_data): |
| | """ |
| | Join the SH ID mappings to the spirometry data for RC and SU1 |
| | -------- |
| | :param RC_IDs: dataframe containing RECEIVER - SH ID mappings |
| | :param SU1_IDs: dataframe containing SU1 - SH ID mappings |
| | :param spirometry_data: spirometry data for RC and SU1 |
| | :return: RC and SU1 spirometry data with SH ID mapping columns |
| | """ |
| | receiver_IDs = RC_IDs.rename(columns={'RNo': 'StudyId'}) |
| | scaleup_IDs = SU1_IDs.rename(columns={'Study_Number': 'StudyId'}) |
| | all_service_IDs = pd.concat([receiver_IDs, scaleup_IDs], ignore_index=True) |
| | spirometry_mappings = pd.merge( |
| | spirometry_data, all_service_IDs, on="StudyId", how="left").dropna() |
| | type_map = {'FEV1%': 'int32', 'SafeHavenID': 'int32'} |
| | spirometry_mappings = spirometry_mappings.astype(type_map) |
| |
|
| | return spirometry_mappings |
| |
|
| |
|
| | def main(): |
| |
|
| | |
| | rec_sup_spiro_file = input_file_path + "RC_SU1_spirometry_data.csv" |
| | rec_sup_spiro_data = read_data(rec_sup_spiro_file).dropna() |
| |
|
| | |
| | rec_sup_spiro_data['GOLD grade'] = rec_sup_spiro_data.apply( |
| | calc_gold_grade, axis=1) |
| |
|
| | |
| | rec_id_file = "<YOUR_DATA_PATH>/EXAMPLE_STUDY_DATA/Cohort3Rand.csv" |
| | sup_id_file = "<YOUR_DATA_PATH>/SU_IDs/Scale_Up_lookup.csv" |
| | rec_id_map_data = read_data(rec_id_file) |
| | sup_id_map_data = read_data(sup_id_file) |
| |
|
| | |
| | mapped_data = add_SH_mappings_for_RC_and_SU1( |
| | rec_id_map_data, sup_id_map_data, rec_sup_spiro_data) |
| |
|
| | |
| | mapped_data.to_pickle(output_file_path) |
| |
|
| |
|
| | main() |
| |
|