|
|
import argparse |
|
|
from pathlib import Path |
|
|
from data_process import get_cxr_paths_list, img_to_hdf5, get_cxr_path_csv, write_report_csv |
|
|
|
|
|
|
|
|
def parse_args(): |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument('--csv_out_path', type=str, default='data/cxr_paths.csv', help="Directory to save paths to all chest x-ray images in dataset.") |
|
|
parser.add_argument('--cxr_out_path', type=str, default='data/cxr.h5', help="Directory to save processed chest x-ray image data.") |
|
|
parser.add_argument('--dataset_type', type=str, default='mimic', choices=['mimic', 'chexpert-test'], help="Type of dataset to pre-process") |
|
|
parser.add_argument('--mimic_impressions_path', default='data/mimic_impressions.csv', help="Directory to save extracted impressions from radiology reports.") |
|
|
parser.add_argument('--chest_x_ray_path', default='/deep/group/data/mimic-cxr/mimic-cxr-jpg/2.0.0/files', help="Directory where chest x-ray image data is stored. This should point to the files folder from the MIMIC chest x-ray dataset.") |
|
|
parser.add_argument('--radiology_reports_path', default='/deep/group/data/med-data/files/', help="Directory radiology reports are stored. This should point to the files folder from the MIMIC radiology reports dataset.") |
|
|
args = parser.parse_args() |
|
|
return args |
|
|
|
|
|
if __name__ == "__main__": |
|
|
args = parse_args() |
|
|
if args.dataset_type == "mimic": |
|
|
|
|
|
get_cxr_path_csv(args.csv_out_path, args.chest_x_ray_path) |
|
|
cxr_paths = get_cxr_paths_list(args.csv_out_path) |
|
|
img_to_hdf5(cxr_paths, args.cxr_out_path) |
|
|
|
|
|
|
|
|
write_report_csv(cxr_paths, args.radiology_reports_path, args.mimic_impressions_path) |
|
|
elif args.dataset_type == "chexpert-test": |
|
|
|
|
|
cxr_dir = Path(args.chest_x_ray_path) |
|
|
cxr_paths = list(cxr_dir.rglob("*.jpg")) |
|
|
cxr_paths = list(filter(lambda x: "view1" in str(x), cxr_paths)) |
|
|
cxr_paths = sorted(cxr_paths) |
|
|
assert(len(cxr_paths) == 500) |
|
|
|
|
|
img_to_hdf5(cxr_paths, args.cxr_out_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|