File size: 2,524 Bytes

53a6def

# Import libraries
from numpy import isnan
import pandas as pd

# Set file paths
file_path = '<YOUR_DATA_PATH>/'
input_file_path = file_path + 'data_for_model_e_columns/'


def read_data(file):
    """
    Read in data source
    --------
    :param file: string filename
    :return: dataframe
    """
    df = pd.read_csv(file)
    return df


def GOLD_grade(data):
    """
    Calculate GOLD grade for COPD classification using FEV1%
    --------
    :param data: dataframe containing FEV1% column
    :return: GOLD grade values based on if else statement
    """
    if (data['FEV1%'] >= 80):
        val = 'GOLD 1'
    elif (data['FEV1%'] >= 50) & (data['FEV1%'] < 80):
        val = 'GOLD 2'
    elif (data['FEV1%'] >= 30) & (data['FEV1%'] < 50):
        val = 'GOLD 3'
    else:
        val = 'GOLD 4'
    return val


def GOLD_group(data):
    """
    Calculate GOLD group from admissions data, exacerbations data, and CAT data
    --------
    :param data: dataframe containing CAT, exacerbations, and admissions data
    :return: GOLD group values based on if else statement
    """
    if (data['CAT_baseline'] >= 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1):
        val = 'GOLD group D'
    elif (data['CAT_baseline'] < 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1):
        val = 'GOLD group C'
    elif (data['CAT_baseline'] >= 10) & ((data['Prior_Ad'] == 0) | (data['exac_prev_year'] < 2) | isnan(data['exac_prev_year'])):
        val = 'GOLD group B'
    else:
        val = 'GOLD group A'
    return val
    

def apply_if_else(data, condition):
    """
    Apply the criteria of an if else statement to all rows
    --------
    :param data: dataframe
    :condition: else if statement
    :return: dataframe with column based on if else statement
    """
    return data.apply(condition, axis=1)


def main():
    # Read data
    RC_SU1_characteristics_file = input_file_path + "Cohort_characteristics_data_RC_SU.csv"
    RC_SU1_characteristics_data = read_data(RC_SU1_characteristics_file)

    # Remove columns that are not required for calculating GOLD criteria
    GOLD_data = RC_SU1_characteristics_data[['ID', 'FEV1%', 'CAT_baseline', 'Prior_Ad', 'exac_prev_year']]

    # Create new columns showing the GOLD group and GOLD stage of each study participant
    GOLD_data['GOLD grade'] = apply_if_else(GOLD_data, GOLD_grade)
    GOLD_data['GOLD group'] = apply_if_else(GOLD_data, GOLD_group)

    # Save data
    GOLD_data.to_csv(file_path + 'GOLD_data.csv')
    

main()