IamGrooooot's picture
Model E: Unsupervised PCA + clustering risk stratification
53a6def
import pandas as pd
from utils.common import read_data
steroid_codes = ['0603020T0AAACAC', '0603020T0AABKBK', '0603020T0AAAXAX',
'0603020T0AAAGAG', '0603020T0AABHBH', '0603020T0AAACAC',
'0603020T0AABKBK', '0603020T0AABNBN', '0603020T0AAAGAG',
'0603020T0AABHBH']
antib_codes = ['0501013B0AAAAAA', '0501013B0AAABAB', '0501030I0AAABAB',
'0501030I0AAAAAA', '0501050B0AAAAAA', '0501050B0AAADAD',
'0501013K0AAAJAJ']
exac_meds = steroid_codes + antib_codes
def initialize_presc_data(presc_file):
"""
Load in prescribing dataset to correct format
--------
:param presc_file: prescribing data file name
:return: prescribing dataframe with correct column names and types
"""
print('Loading prescribing data')
# Read in data
presc_cols = ['SafeHavenID', 'PRESC_DATE', 'PI_Approved_Name',
'PI_BNF_Item_Code']
presc_types = ['int', 'object', 'str', 'str']
df = read_data(presc_file, presc_cols, presc_types)
# Drop any nulls or duplicates
df = df.dropna()
df = df.drop_duplicates()
# Convert date
df['PRESC_DATE'] = pd.to_datetime(df.PRESC_DATE)
return df
def track_medication(df):
"""
Track salbutamol and rescue med prescriptions
https://openprescribing.net/bnf/
--------
:param df: dataframe
:return: dataframe with tracked meds
"""
print('Tracking medication')
# Extract BNF codes without brand info
df['code'] = df.PI_BNF_Item_Code.apply(lambda x: x[0:9])
# Add flag for salbutamol - marked important by Chris
df['SALBUTAMOL'] = (df.code == '0301011R0').astype(int)
# Track rescue meds
df['rescue_meds'] = df.PI_BNF_Item_Code.str.contains(
'|'.join(exac_meds)).astype(int)
# Track anxiety and depression medication
ad_bnf = ('040102', '0403', '0204000R0', '0408010AE')
ad_events = df.PI_BNF_Item_Code.str.startswith(ad_bnf).fillna(False)
drop_dummy = (df.PI_Approved_Name != 'DUMMY') & (df.PI_Approved_Name != 'DUMMY REJECTED')
df['anxiety_depression_presc'] = (drop_dummy & ad_events).astype(int)
return df