| import subprocess |
| import pandas as pd |
| import numpy as np |
| import ast |
| import os |
| import tqdm |
| import shutil |
| import wfdb |
|
|
|
|
| |
| |
| |
| def aggregate_diagnostic_one_class(y_dic): |
| best_class = "None" |
| best_value = -float('inf') |
| for key, value in y_dic.items(): |
| if key in agg_df.index and value > best_value: |
| best_value = value |
| best_class = agg_df.loc[key].diagnostic_class |
| return best_class |
|
|
| def aggregate_diagnostic_one_subclass(y_dic): |
| best_class = "None" |
| best_value = -float('inf') |
| for key, value in y_dic.items(): |
| if key in agg_df.index and value > best_value: |
| best_value = value |
| best_class = agg_df.loc[key].diagnostic_subclass |
| return best_class |
|
|
| def aggregate_diagnostic_all_classes(y_dic): |
| best_class = "None" |
| best_value = -float('inf') |
| for key, value in y_dic.items(): |
| if key in agg_df.index and value > best_value: |
| best_value = value |
| best_class = key |
| return best_class |
| |
| ''' |
| Function to create patches for NeuroRVQ |
| ''' |
| def create_patches(ecg_signal, maximum_patches, patch_size, channels_use): |
| n, c, t = ecg_signal.shape |
| n_time = (maximum_patches // len(channels_use)) |
| ecg_signal = ecg_signal[:, :, :n_time * patch_size] |
| ecg_signal_patches = ecg_signal[:, channels_use, :] |
| return ecg_signal_patches, n_time |
|
|
|
|
| if not os.path.exists("./example_files/ecg_sample/ptb_xl_cut_benchmarking.npy"): |
| |
| |
| |
| print("Downloading ptb_xl....") |
| subprocess.run([ |
| "wget", |
| "-r", "-N", "-c", "-np", |
| "-P", "./ptb_xl", |
| "https://physionet.org/files/ptb-xl/1.0.3/" |
| ], check=False) |
|
|
|
|
| |
| |
| |
| source_root = "./ptb_xl/physionet.org/files/ptb-xl/1.0.3/records500" |
| target_root = "./ptb_xl/records500_all" |
| os.makedirs(target_root, exist_ok=True) |
|
|
| for subfolder in os.listdir(source_root): |
| subfolder_path = os.path.join(source_root, subfolder) |
|
|
| if os.path.isdir(subfolder_path): |
| for filename in os.listdir(subfolder_path): |
| src_file = os.path.join(subfolder_path, filename) |
| dst_file = os.path.join(target_root, filename) |
|
|
| |
| if os.path.exists(dst_file): |
| name, ext = os.path.splitext(filename) |
| dst_file = os.path.join(target_root, f"{name}_{subfolder}{ext}") |
|
|
| shutil.move(src_file, dst_file) |
|
|
| print("Merge complete.") |
| print("Dataset cutting....") |
|
|
| |
| |
| |
| path = './ptb_xl/physionet.org/files/ptb-xl/1.0.3/' |
| data_path = './ptb_xl/records500_all' |
|
|
| |
| Y = pd.read_csv(path+'ptbxl_database.csv', index_col='ecg_id') |
| Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x)) |
|
|
| new_Y = Y.loc[:, ['patient_id', 'scp_codes', 'strat_fold', 'filename_lr', 'filename_hr']] |
| new_Y.index.name == 'ecg_id' |
|
|
| |
| agg_df_all = pd.read_csv(path+'scp_statements.csv', index_col=0) |
| agg_df = agg_df_all[agg_df_all.diagnostic == 1] |
|
|
| |
| new_Y['diagnostic_5_classes'] = new_Y.scp_codes.apply(aggregate_diagnostic_one_class) |
| new_Y['diagnostic_23_classes'] = new_Y.scp_codes.apply(aggregate_diagnostic_one_subclass) |
| new_Y['diagnostic_44_classes'] = new_Y.scp_codes.apply(aggregate_diagnostic_all_classes) |
| new_Y['filename_hr'] = new_Y['filename_hr'].str.split('/').str[-1] |
| new_Y.to_csv('./example_files/ecg_sample/ptb_xl_cut_benchmarking.csv') |
|
|
| |
| X = np.zeros((len(new_Y['filename_hr']), 12, 5000)) |
|
|
| for idx, f_i in enumerate(tqdm.tqdm(new_Y['filename_hr'])): |
| x = wfdb.rdsamp(os.path.join(data_path, f_i))[0].T |
| if x.shape != (12, 5000): |
| raise ValueError(f"Signal {f_i} has shape {x.shape}, expected (12, 5000)") |
| ch_names = np.array(wfdb.rdsamp(os.path.join(data_path, f_i.split('.')[0]))[1]['sig_name']) |
| ch_names = np.array([e.lower() for e in ch_names]) |
| if (idx==0): |
| ref_ch_names = ch_names |
| X[idx, :, :] = x |
| else: |
| try: |
| |
| reorder_idx = [np.where(ch_names == ch)[0][0] for ch in ref_ch_names] |
| x_reor = x[reorder_idx, :] |
| except IndexError: |
| raise ValueError(f"Channel names in {f_i} do not match reference channels.") |
| X[idx, :, :] = x_reor |
|
|
| print(X.shape) |
| np.save('./example_files/ecg_sample/ptb_xl_cut_benchmarking.npy', X) |
| print("Dataset is ready") |
|
|