File size: 2,524 Bytes
53a6def | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | # Import libraries
from numpy import isnan
import pandas as pd
# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'
def read_data(file):
"""
Read in data source
--------
:param file: string filename
:return: dataframe
"""
df = pd.read_csv(file)
return df
def GOLD_grade(data):
"""
Calculate GOLD grade for COPD classification using FEV1%
--------
:param data: dataframe containing FEV1% column
:return: GOLD grade values based on if else statement
"""
if (data['FEV1%'] >= 80):
val = 'GOLD 1'
elif (data['FEV1%'] >= 50) & (data['FEV1%'] < 80):
val = 'GOLD 2'
elif (data['FEV1%'] >= 30) & (data['FEV1%'] < 50):
val = 'GOLD 3'
else:
val = 'GOLD 4'
return val
def GOLD_group(data):
"""
Calculate GOLD group from admissions data, exacerbations data, and CAT data
--------
:param data: dataframe containing CAT, exacerbations, and admissions data
:return: GOLD group values based on if else statement
"""
if (data['CAT_baseline'] >= 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1):
val = 'GOLD group D'
elif (data['CAT_baseline'] < 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1):
val = 'GOLD group C'
elif (data['CAT_baseline'] >= 10) & ((data['Prior_Ad'] == 0) | (data['exac_prev_year'] < 2) | isnan(data['exac_prev_year'])):
val = 'GOLD group B'
else:
val = 'GOLD group A'
return val
def apply_if_else(data, condition):
"""
Apply the criteria of an if else statement to all rows
--------
:param data: dataframe
:condition: else if statement
:return: dataframe with column based on if else statement
"""
return data.apply(condition, axis=1)
def main():
# Read data
RC_SU1_characteristics_file = input_file_path + "Cohort_characteristics_data_RC_SU.csv"
RC_SU1_characteristics_data = read_data(RC_SU1_characteristics_file)
# Remove columns that are not required for calculating GOLD criteria
GOLD_data = RC_SU1_characteristics_data[['ID', 'FEV1%', 'CAT_baseline', 'Prior_Ad', 'exac_prev_year']]
# Create new columns showing the GOLD group and GOLD stage of each study participant
GOLD_data['GOLD grade'] = apply_if_else(GOLD_data, GOLD_grade)
GOLD_data['GOLD group'] = apply_if_else(GOLD_data, GOLD_group)
# Save data
GOLD_data.to_csv(file_path + 'GOLD_data.csv')
main() |