# Import libraries from numpy import isnan import pandas as pd # Set file paths file_path = '/' input_file_path = file_path + 'data_for_model_e_columns/' def read_data(file): """ Read in data source -------- :param file: string filename :return: dataframe """ df = pd.read_csv(file) return df def GOLD_grade(data): """ Calculate GOLD grade for COPD classification using FEV1% -------- :param data: dataframe containing FEV1% column :return: GOLD grade values based on if else statement """ if (data['FEV1%'] >= 80): val = 'GOLD 1' elif (data['FEV1%'] >= 50) & (data['FEV1%'] < 80): val = 'GOLD 2' elif (data['FEV1%'] >= 30) & (data['FEV1%'] < 50): val = 'GOLD 3' else: val = 'GOLD 4' return val def GOLD_group(data): """ Calculate GOLD group from admissions data, exacerbations data, and CAT data -------- :param data: dataframe containing CAT, exacerbations, and admissions data :return: GOLD group values based on if else statement """ if (data['CAT_baseline'] >= 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): val = 'GOLD group D' elif (data['CAT_baseline'] < 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): val = 'GOLD group C' elif (data['CAT_baseline'] >= 10) & ((data['Prior_Ad'] == 0) | (data['exac_prev_year'] < 2) | isnan(data['exac_prev_year'])): val = 'GOLD group B' else: val = 'GOLD group A' return val def apply_if_else(data, condition): """ Apply the criteria of an if else statement to all rows -------- :param data: dataframe :condition: else if statement :return: dataframe with column based on if else statement """ return data.apply(condition, axis=1) def main(): # Read data RC_SU1_characteristics_file = input_file_path + "Cohort_characteristics_data_RC_SU.csv" RC_SU1_characteristics_data = read_data(RC_SU1_characteristics_file) # Remove columns that are not required for calculating GOLD criteria GOLD_data = RC_SU1_characteristics_data[['ID', 'FEV1%', 'CAT_baseline', 'Prior_Ad', 'exac_prev_year']] # Create new columns showing the GOLD group and GOLD stage of each study participant GOLD_data['GOLD grade'] = apply_if_else(GOLD_data, GOLD_grade) GOLD_data['GOLD group'] = apply_if_else(GOLD_data, GOLD_group) # Save data GOLD_data.to_csv(file_path + 'GOLD_data.csv') main()