File size: 5,603 Bytes
c642393 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | from natsort import natsorted
import numpy as np
from pathlib import Path
import os
from os.path import join
from nnunet.dataset_conversion.utils import generate_dataset_json
import SimpleITK as sitk
import gc
import multiprocessing as mp
from functools import partial
def preprocess_dataset(ribfrac_load_path, ribseg_load_path, dataset_save_path, pool):
mask_load_path = join(ribseg_load_path, "labelsTr")
train_image_save_path = join(dataset_save_path, "imagesTr")
train_mask_save_path = join(dataset_save_path, "labelsTr")
test_image_save_path = join(dataset_save_path, "imagesTs")
test_labels_save_path = join(dataset_save_path, "labelsTs")
Path(train_image_save_path).mkdir(parents=True, exist_ok=True)
Path(train_mask_save_path).mkdir(parents=True, exist_ok=True)
Path(test_image_save_path).mkdir(parents=True, exist_ok=True)
Path(test_labels_save_path).mkdir(parents=True, exist_ok=True)
mask_filenames = load_filenames(mask_load_path)
pool.map(partial(preprocess_single, image_load_path=ribfrac_load_path), mask_filenames)
def preprocess_single(filename, image_load_path):
name = os.path.basename(filename)
if "-cl.nii.gz" in name:
return
id = int(name.split("-")[0][7:])
image_set = "imagesTr"
mask_set = "labelsTr"
if id > 500:
image_set = "imagesTs"
mask_set = "labelsTs"
image, _, _, _ = load_image(join(image_load_path, image_set, "RibFrac{}-image.nii.gz".format(id)), return_meta=True, is_seg=False)
mask, spacing, _, _ = load_image(filename, return_meta=True, is_seg=True)
save_image(join(dataset_save_path, image_set, "RibSeg_" + str(id).zfill(4) + "_0000.nii.gz"), image, spacing=spacing, is_seg=False)
save_image(join(dataset_save_path, mask_set, "RibSeg_" + str(id).zfill(4) + ".nii.gz"), mask, spacing=spacing, is_seg=True)
def load_filenames(img_dir, extensions=None):
_img_dir = fix_path(img_dir)
img_filenames = []
for file in os.listdir(_img_dir):
if extensions is None or file.endswith(extensions):
img_filenames.append(_img_dir + file)
img_filenames = np.asarray(img_filenames)
img_filenames = natsorted(img_filenames)
return img_filenames
def fix_path(path):
if path[-1] != "/":
path += "/"
return path
def load_image(filepath, return_meta=False, is_seg=False):
image = sitk.ReadImage(filepath)
image_np = sitk.GetArrayFromImage(image)
if is_seg:
image_np = np.rint(image_np)
image_np = image_np.astype(np.int8) # In special cases segmentations can contain negative labels, so no np.uint8
if not return_meta:
return image_np
else:
spacing = image.GetSpacing()
keys = image.GetMetaDataKeys()
header = {key:image.GetMetaData(key) for key in keys}
affine = None # How do I get the affine transform with SimpleITK? With NiBabel it is just image.affine
return image_np, spacing, affine, header
def save_image(filename, image, spacing=None, affine=None, header=None, is_seg=False, mp_pool=None, free_mem=False):
if is_seg:
image = np.rint(image)
image = image.astype(np.int8) # In special cases segmentations can contain negative labels, so no np.uint8
image = sitk.GetImageFromArray(image)
if header is not None:
[image.SetMetaData(key, header[key]) for key in header.keys()]
if spacing is not None:
image.SetSpacing(spacing)
if affine is not None:
pass # How do I set the affine transform with SimpleITK? With NiBabel it is just nib.Nifti1Image(img, affine=affine, header=header)
if mp_pool is None:
sitk.WriteImage(image, filename)
if free_mem:
del image
gc.collect()
else:
mp_pool.apply_async(_save, args=(filename, image, free_mem,))
if free_mem:
del image
gc.collect()
def _save(filename, image, free_mem):
sitk.WriteImage(image, filename)
if free_mem:
del image
gc.collect()
if __name__ == "__main__":
# Note: Due to a bug in SimpleITK 2.1.x a version of SimpleITK < 2.1.0 is required for loading images. Further, we can't copy the images and masks, but have to load them and resample both to the same spacing.
# Conversion instructions:
# 1. All images from both training and validation set of the RibFrac dataset need to be downloaded from https://ribfrac.grand-challenge.org/dataset/ into a new folder named RibFrac
# 2. The RibSeg masks need to be downloaded from https://zenodo.org/record/5336592 into a new folder named RibSeg
# 3. Follow unpacking instruction for the RibFrac dataset as in Task154_RibFrac
# 4. Unzip RibSeg_490_nii.zip from the RibSeg dataset and rename the folder labelsTr
ribfrac_load_path = "/home/k539i/Documents/datasets/original/RibFrac/"
ribseg_load_path = "/home/k539i/Documents/datasets/original/RibSeg/"
dataset_save_path = "/home/k539i/Documents/datasets/preprocessed/Task156_RibSeg/"
max_imagesTr_id = 500
pool = mp.Pool(processes=20)
preprocess_dataset(ribfrac_load_path, ribseg_load_path, dataset_save_path, pool)
print("Still saving images in background...")
pool.close()
pool.join()
print("All tasks finished.")
generate_dataset_json(join(dataset_save_path, 'dataset.json'), join(dataset_save_path, "imagesTr"), None, ('CT',), {0: 'bg', 1: 'rib'}, "Task156_RibSeg")
|