FelixzeroSun's picture
Upload folder using huggingface_hub
19c1f58 verified
# Copyright 2021 HIP Applied Computer Vision Lab, Division of Medical Image Computing, German Cancer Research Center
# (DKFZ), Heidelberg, Germany
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os.path
from functools import lru_cache
from typing import Union
from batchgenerators.utilities.file_and_folder_operations import *
import numpy as np
import re
from nnunetv2.paths import nnUNet_raw
def get_identifiers_from_splitted_dataset_folder(folder: str, file_ending: str):
files = subfiles(folder, suffix=file_ending, join=False)
# all files have a 4 digit channel index (_XXXX)
crop = len(file_ending) + 5
files = [i[:-crop] for i in files]
# only unique image ids
files = np.unique(files)
return files
def create_lists_from_splitted_dataset_folder(folder: str, file_ending: str, identifiers: List[str] = None) -> List[
List[str]]:
"""
does not rely on dataset.json
"""
if identifiers is None:
identifiers = get_identifiers_from_splitted_dataset_folder(folder, file_ending)
files = subfiles(folder, suffix=file_ending, join=False, sort=True)
list_of_lists = []
for f in identifiers:
p = re.compile(re.escape(f) + r"_\d\d\d\d" + re.escape(file_ending))
list_of_lists.append([join(folder, i) for i in files if p.fullmatch(i)])
return list_of_lists
# return [files] #arthur, fix for nnUNet Imene8
def get_filenames_of_train_images_and_targets(raw_dataset_folder: str, dataset_json: dict = None):
if dataset_json is None:
dataset_json = load_json(join(raw_dataset_folder, 'dataset.json'))
if 'dataset' in dataset_json.keys():
dataset = dataset_json['dataset']
for k in dataset.keys():
dataset[k]['label'] = os.path.abspath(join(raw_dataset_folder, dataset[k]['label'])) if not os.path.isabs(dataset[k]['label']) else dataset[k]['label']
dataset[k]['images'] = [os.path.abspath(join(raw_dataset_folder, i)) if not os.path.isabs(i) else i for i in dataset[k]['images']]
else:
identifiers = get_identifiers_from_splitted_dataset_folder(join(raw_dataset_folder, 'imagesTr'), dataset_json['file_ending'])
images = create_lists_from_splitted_dataset_folder(join(raw_dataset_folder, 'imagesTr'), dataset_json['file_ending'], identifiers)
segs = [join(raw_dataset_folder, 'labelsTr', i + dataset_json['file_ending']) for i in identifiers]
dataset = {i: {'images': im, 'label': se} for i, im, se in zip(identifiers, images, segs)}
return dataset
if __name__ == '__main__':
print(get_filenames_of_train_images_and_targets(join(nnUNet_raw, 'Dataset002_Heart')))