Alhdrawi commited on
Commit
d275aec
·
verified ·
1 Parent(s): b534b11

Upload run_preprocess.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_preprocess.py +41 -0
run_preprocess.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+ from data_process import get_cxr_paths_list, img_to_hdf5, get_cxr_path_csv, write_report_csv
4
+
5
+
6
+ def parse_args():
7
+ parser = argparse.ArgumentParser()
8
+ parser.add_argument('--csv_out_path', type=str, default='data/cxr_paths.csv', help="Directory to save paths to all chest x-ray images in dataset.")
9
+ parser.add_argument('--cxr_out_path', type=str, default='data/cxr.h5', help="Directory to save processed chest x-ray image data.")
10
+ parser.add_argument('--dataset_type', type=str, default='mimic', choices=['mimic', 'chexpert-test'], help="Type of dataset to pre-process")
11
+ parser.add_argument('--mimic_impressions_path', default='data/mimic_impressions.csv', help="Directory to save extracted impressions from radiology reports.")
12
+ parser.add_argument('--chest_x_ray_path', default='/deep/group/data/mimic-cxr/mimic-cxr-jpg/2.0.0/files', help="Directory where chest x-ray image data is stored. This should point to the files folder from the MIMIC chest x-ray dataset.")
13
+ parser.add_argument('--radiology_reports_path', default='/deep/group/data/med-data/files/', help="Directory radiology reports are stored. This should point to the files folder from the MIMIC radiology reports dataset.")
14
+ args = parser.parse_args()
15
+ return args
16
+
17
+ if __name__ == "__main__":
18
+ args = parse_args()
19
+ if args.dataset_type == "mimic":
20
+ # Write Chest X-ray Image HDF5 File
21
+ get_cxr_path_csv(args.csv_out_path, args.chest_x_ray_path)
22
+ cxr_paths = get_cxr_paths_list(args.csv_out_path)
23
+ img_to_hdf5(cxr_paths, args.cxr_out_path)
24
+
25
+ #Write CSV File Containing Impressions for each Chest X-ray
26
+ write_report_csv(cxr_paths, args.radiology_reports_path, args.mimic_impressions_path)
27
+ elif args.dataset_type == "chexpert-test":
28
+ # Get all test paths based on cxr dir
29
+ cxr_dir = Path(args.chest_x_ray_path)
30
+ cxr_paths = list(cxr_dir.rglob("*.jpg"))
31
+ cxr_paths = list(filter(lambda x: "view1" in str(x), cxr_paths)) # filter only first frontal views
32
+ cxr_paths = sorted(cxr_paths) # sort to align with groundtruth
33
+ assert(len(cxr_paths) == 500)
34
+
35
+ img_to_hdf5(cxr_paths, args.cxr_out_path)
36
+
37
+
38
+
39
+
40
+
41
+