|
|
from typing import Tuple |
|
|
|
|
|
from batchgenerators.utilities.file_and_folder_operations import save_json, join |
|
|
|
|
|
|
|
|
def generate_dataset_json(output_folder: str, |
|
|
channel_names: dict, |
|
|
labels: dict, |
|
|
num_training_cases: int, |
|
|
file_ending: str, |
|
|
regions_class_order: Tuple[int, ...] = None, |
|
|
dataset_name: str = None, reference: str = None, release: str = None, license: str = None, |
|
|
description: str = None, |
|
|
overwrite_image_reader_writer: str = None, **kwargs): |
|
|
""" |
|
|
Generates a dataset.json file in the output folder |
|
|
|
|
|
channel_names: |
|
|
Channel names must map the index to the name of the channel, example: |
|
|
{ |
|
|
0: 'T1', |
|
|
1: 'CT' |
|
|
} |
|
|
Note that the channel names may influence the normalization scheme!! Learn more in the documentation. |
|
|
|
|
|
labels: |
|
|
This will tell nnU-Net what labels to expect. Important: This will also determine whether you use region-based training or not. |
|
|
Example regular labels: |
|
|
{ |
|
|
'background': 0, |
|
|
'left atrium': 1, |
|
|
'some other label': 2 |
|
|
} |
|
|
Example region-based training: |
|
|
{ |
|
|
'background': 0, |
|
|
'whole tumor': (1, 2, 3), |
|
|
'tumor core': (2, 3), |
|
|
'enhancing tumor': 3 |
|
|
} |
|
|
|
|
|
Remember that nnU-Net expects consecutive values for labels! nnU-Net also expects 0 to be background! |
|
|
|
|
|
num_training_cases: is used to double check all cases are there! |
|
|
|
|
|
file_ending: needed for finding the files correctly. IMPORTANT! File endings must match between images and |
|
|
segmentations! |
|
|
|
|
|
dataset_name, reference, release, license, description: self-explanatory and not used by nnU-Net. Just for |
|
|
completeness and as a reminder that these would be great! |
|
|
|
|
|
overwrite_image_reader_writer: If you need a special IO class for your dataset you can derive it from |
|
|
BaseReaderWriter, place it into nnunet.imageio and reference it here by name |
|
|
|
|
|
kwargs: whatever you put here will be placed in the dataset.json as well |
|
|
|
|
|
""" |
|
|
has_regions: bool = any([isinstance(i, (tuple, list)) and len(i) > 1 for i in labels.values()]) |
|
|
if has_regions: |
|
|
assert regions_class_order is not None, f"You have defined regions but regions_class_order is not set. " \ |
|
|
f"You need that." |
|
|
|
|
|
keys = list(channel_names.keys()) |
|
|
for k in keys: |
|
|
if not isinstance(k, str): |
|
|
channel_names[str(k)] = channel_names[k] |
|
|
del channel_names[k] |
|
|
|
|
|
|
|
|
for l in labels.keys(): |
|
|
value = labels[l] |
|
|
if isinstance(value, (tuple, list)): |
|
|
value = tuple([int(i) for i in value]) |
|
|
labels[l] = value |
|
|
else: |
|
|
labels[l] = int(labels[l]) |
|
|
|
|
|
dataset_json = { |
|
|
'channel_names': channel_names, |
|
|
|
|
|
'labels': labels, |
|
|
'numTraining': num_training_cases, |
|
|
'file_ending': file_ending, |
|
|
} |
|
|
|
|
|
if dataset_name is not None: |
|
|
dataset_json['name'] = dataset_name |
|
|
if reference is not None: |
|
|
dataset_json['reference'] = reference |
|
|
if release is not None: |
|
|
dataset_json['release'] = release |
|
|
if license is not None: |
|
|
dataset_json['licence'] = license |
|
|
if description is not None: |
|
|
dataset_json['description'] = description |
|
|
if overwrite_image_reader_writer is not None: |
|
|
dataset_json['overwrite_image_reader_writer'] = overwrite_image_reader_writer |
|
|
if regions_class_order is not None: |
|
|
dataset_json['regions_class_order'] = regions_class_order |
|
|
|
|
|
dataset_json.update(kwargs) |
|
|
|
|
|
save_json(dataset_json, join(output_folder, 'dataset.json'), sort_keys=False) |
|
|
|