| | import copd |
| | import os |
| | import pandas as pd |
| | from scipy.stats import ks_2samp, cramervonmises_2samp |
| | import seaborn as sns |
| | import matplotlib.pyplot as plt |
| | sns.set(style='darkgrid', context='talk') |
| | sns.set_palette('dark') |
| | muted = sns.palettes.color_palette(palette='muted') |
| | dark = sns.palettes.color_palette(palette='dark') |
| |
|
| | data_dir = '<YOUR_DATA_PATH>/lenus-samples-dataset' |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | lenus_sample = pd.read_csv(os.path.join(data_dir, "DataServerDatasetSample.txt"), |
| | delimiter="|", usecols=['StartDate', 'EndDate', |
| | 'CreatorSubject', 'QuantityId', |
| | 'TypeIdentifier', 'CreationDate']) |
| |
|
| | |
| | |
| | date_cols = ['StartDate', 'EndDate', 'CreationDate'] |
| | for col in date_cols: |
| | lenus_sample[col] = pd.to_datetime(lenus_sample[col], utc=True).dt.normalize() |
| |
|
| | |
| | |
| | lenus_quantity = pd.read_csv(os.path.join(data_dir, "DataServerDatasetQuantity.txt"), |
| | delimiter="|") |
| |
|
| | |
| | platform_data = lenus_sample.merge(lenus_quantity, left_on='QuantityId', |
| | right_on='Id').drop(columns=['Id']) |
| |
|
| | |
| | platform_data['Units'] = copd.unit_lookup(platform_data['Unit']) |
| | type_lookup = pd.read_csv('./lookups/type_lookup.txt') |
| | platform_data = platform_data.merge(type_lookup, left_on='TypeIdentifier', |
| | right_on=type_lookup.index) |
| |
|
| | |
| | platform_data = platform_data.drop(columns=['TypeIdentifier', 'Unit']) |
| |
|
| | |
| | platform_data = pd.pivot_table(platform_data, values='Value', |
| | index=['StartDate', 'EndDate', 'CreationDate', |
| | 'CreatorSubject'], |
| | columns=['Description']).reset_index() |
| |
|
| | data = pd.read_pickle(os.path.join('<YOUR_DATA_PATH>/copd-dataset', 'exac_data.pkl')) |
| | patients = data.LenusId.unique() |
| |
|
| |
|
| | def filter_on_date_and_id(df, min_date, patients): |
| | return df[(df.CreationDate >= min_date) & (df.CreatorSubject.isin(patients))] |
| |
|
| |
|
| | def resample_and_merge_median(df, fitbit): |
| | |
| | |
| | fitbit = fitbit.set_index('CreationDate').groupby('CreatorSubject').resample( |
| | '1d').median().dropna().reset_index() |
| | data = df.merge(fitbit, left_on=['LenusId', 'DateOfEvent'], |
| | right_on=['CreatorSubject', 'CreationDate'], how='inner') |
| | return data |
| |
|
| |
|
| | def resample_and_merge_last(df, fitbit): |
| | fitbit['DateOfEvent'] = fitbit['CreationDate'] |
| | |
| | fitbit = fitbit.set_index('CreationDate').groupby('CreatorSubject').resample( |
| | '1d').last().dropna().reset_index(drop=True) |
| | data = df.merge(fitbit, left_on=['LenusId', 'DateOfEvent'], |
| | right_on=['CreatorSubject', 'DateOfEvent'], how='inner') |
| | return data |
| |
|
| |
|
| | def print_numbers(df, measurement): |
| | fitbit_patients = pd.Series(df.StudyId.unique()) |
| | print('{} patient days with {} data across {} unique patients ({} RC and {} SU)'. |
| | format(len(df), measurement, len(df.PatientId.unique()), |
| | fitbit_patients.str.startswith('RC').sum(), |
| | fitbit_patients.str.startswith('SU').sum())) |
| | exac_patients = pd.Series(df[df.IsExac == 1].StudyId.unique()) |
| | print('{} exacerbations across {} patients ({} RC and {} SU)'.format(df.IsExac.sum(), |
| | len(df[df.IsExac == 1].PatientId.unique()), |
| | exac_patients.str.startswith('RC').sum(), |
| | exac_patients.str.startswith('SU').sum())) |
| |
|
| |
|
| | |
| | heart_rate = platform_data[platform_data['heart rate'].notna()][ |
| | ['CreationDate', 'CreatorSubject', 'heart rate']] |
| |
|
| | |
| | heart_rate = filter_on_date_and_id(heart_rate, min_date='2010-01-01', patients=patients) |
| | heart_rate.columns |
| |
|
| | hr_data = resample_and_merge_last(df=data, fitbit=heart_rate) |
| | print_numbers(hr_data, 'HR') |
| |
|
| | steps = platform_data[platform_data['number of steps taken;'].notna()][[ |
| | 'CreationDate', 'CreatorSubject', 'number of steps taken;']] |
| | |
| | steps = filter_on_date_and_id(steps, min_date='2010-01-01', patients=patients) |
| | steps_data = resample_and_merge_median(df=data, fitbit=steps) |
| |
|
| | print_numbers(steps_data, 'steps') |
| |
|
| | hr_exac_patients = hr_data[hr_data.IsExac == 1]['PatientId'].unique() |
| | hr_data = hr_data[hr_data.PatientId.isin(hr_exac_patients)] |
| |
|
| | hr_exac = hr_data[hr_data.IsExac == 1]['heart rate'] |
| | hr_no_exac = hr_data[hr_data.IsExac == 0]['heart rate'] |
| |
|
| | ks_2samp(hr_exac, hr_no_exac) |
| | cramervonmises_2samp(hr_exac, hr_no_exac) |
| |
|
| | steps_exac_patients = steps_data[steps_data.IsExac == 1]['PatientId'].unique() |
| | steps_data = steps_data[steps_data.PatientId.isin(steps_exac_patients)] |
| |
|
| | steps_exac = steps_data[steps_data.IsExac == 1]['number of steps taken;'] |
| | steps_no_exac = steps_data[steps_data.IsExac == 0]['number of steps taken;'] |
| |
|
| | ks_2samp(steps_exac, steps_no_exac) |
| | cramervonmises_2samp(steps_exac, steps_no_exac) |
| |
|
| | fig, axes = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, |
| | constrained_layout=True, figsize=(8, 6)) |
| | sns.histplot(hr_data[hr_data.IsExac == 0], x="heart rate", binwidth=5, binrange=[50, 100], |
| | alpha=.6, stat="density", legend=True, ax=axes[0], color=dark[0]) |
| | axes[0].set_xlabel(None) |
| | plt.legend(['a']) |
| | sns.histplot(hr_data[hr_data.IsExac == 1], x="heart rate", binwidth=5, binrange=[50, 100], |
| | alpha=.6, stat="density", legend=True, ax=axes[1], color=dark[1]) |
| | axes[1].set_xlabel(None) |
| | fig.supxlabel('heart rate') |
| | plt.legend(['b']) |
| |
|