|
|
| '''
|
| write by ygq
|
| create on 2025-08-03
|
| update BRATS_2020
|
|
|
| BRATS2020 是 BRATS 系列的一个重要里程碑。它在 BRATS2019 的基础上,通过显著扩大数据规模、增加数据多样性(尤其是纳入中国数据)、
|
| 完善生存预测任务的评估流程(验证集和测试集包含生存信息)以及引入额外未标注数据以促进新学习范式,为脑胶质瘤多模态 MRI 分割和生存预测研究设定了更高的标准。
|
|
|
| 数据内容与规模(显著扩大):
|
| 训练集: 包含 369 例 患者的完整多模态 MRI 扫描数据及其对应的专家手动分割标注(Ground Truth)。(相比2019的335例增加)
|
| 验证集: 包含 125 例 患者的完整多模态 MRI 扫描数据。没有提供标注。用于开发阶段在线评估算法性能。
|
| 测试集: 包含 166 例 患者的完整多模态 MRI 扫描数据。没有提供标注。这是最终排名使用的独立测试集。(与2019测试集规模相同,但内容不同)
|
|
|
| 关键特性 - 多模态 MRI(与2019一致):
|
| 每个病例仍然包含四种预处理后的 3D MRI 序列:
|
| Native (T1)
|
| Post-contrast T1-weighted (T1Gd/T1ce)
|
| T2-weighted (T2)
|
| T2 Fluid Attenuated Inversion Recovery (T2-FLAIR)
|
|
|
| 关键特性 - 肿瘤标注(与2019一致):
|
|
|
| 训练集提供专家手动勾画的精细标注。
|
| 标注定义相同的三个子区域:
|
| 坏疽性和非增强肿瘤核心: 标签值 = 1
|
| 瘤周水肿: 标签值 = 2
|
| 增强肿瘤: 标签值 = 4
|
| 整个肿瘤区域: 标签值 1+2+4
|
| 肿瘤核心区域: 标签值 1+4
|
|
|
|
|
| 根据沟通参考MSD中的BRATS的结构:
|
| 1.将多个分开的模态合并,构建第四个维度的数组,分别按照FLAIR,T1,T1CE,T2顺序存放;
|
| 2.生存期信息也需要相应补充道HGG的数据集中
|
|
|
| Trainning:
|
| meta_info:[保留Grade,BraTS_2019_subject_ID]--name_mapping.csv
|
| Grade,BraTS_2017_subject_ID,BraTS_2018_subject_ID,TCGA_TCIA_subject_ID,BraTS_2019_subject_ID,BraTS_2020_subject_ID
|
| HGG,Brats17_CBICA_AAB_1,Brats18_CBICA_AAB_1,NA,BraTS19_CBICA_AAB_1,BraTS20_Training_001
|
| HGG,Brats17_CBICA_AAG_1,Brats18_CBICA_AAG_1,NA,BraTS19_CBICA_AAG_1,BraTS20_Training_002
|
|
|
| survival_info:--survival_info.csv
|
| Brats20ID,Age,Survival_days,Extent_of_Resection
|
| BraTS20_Training_001,60.463,289,GTR
|
| BraTS20_Training_002,52.263,616,GTR
|
| Validation:
|
| meta_info:[保留Grade,BraTS_2019_subject_ID]--name_mapping_validation_data.csv
|
| BraTS_2017_subject_ID,BraTS_2018_subject_ID,TCGA_TCIA_subject_ID,BraTS_2019_subject_ID,BraTS_2020_subject_ID
|
| Brats17_CBICA_AAM_1,Brats18_CBICA_AAM_1,NA,BraTS19_CBICA_AAM_1,BraTS20_Validation_001
|
| Brats17_CBICA_ABT_1,Brats18_CBICA_ABT_1,NA,BraTS19_CBICA_ABT_1,BraTS20_Validation_002
|
|
|
| survival_info:--survival_evaluation.csv
|
| BraTS20ID,Age,ResectionStatus
|
| BraTS20_Validation_001,68.170,GTR
|
| BraTS20_Validation_002,50.153,GTR
|
|
|
| '''
|
| import os
|
| import glob
|
| import pandas as pd
|
| import SimpleITK as sitk
|
| import argparse
|
| import json
|
| from tqdm import tqdm
|
| from util import meta_data
|
| import util
|
| import numpy as np
|
|
|
|
|
| import shutil
|
|
|
|
|
|
|
| meta_info_dict={
|
| "training":{
|
| 'meta_id_name':'BraTS_2020_subject_ID',
|
| 'meta_grade_name':'Grade',
|
| 'survival_id_name':'Brats20ID',
|
| 'meta_age_name':'Age',
|
| 'meta_survival_name':'Survival_days',
|
| 'meta_status_name':'Extent_of_Resection'
|
| },
|
| 'validation':{
|
| 'meta_id_name':'BraTS_2020_subject_ID',
|
| 'survival_id_name':'BraTS20ID',
|
| 'meta_age_name':'Age',
|
| 'meta_status_name':'ResectionStatus'
|
| }
|
| }
|
|
|
|
|
|
|
| TASK_VALUE="segmentation"
|
| CLAMP_RANGE_CT = [-300,300]
|
| CLAMP_RANGE_MRI = None
|
| TARGET_VOXEL_SPACING=None
|
|
|
|
|
| SUB_MODALITY=["FLAIR","T1w","t1gd","T2w"]
|
|
|
| SERIES_ORDER=["flair","t1","t1ce","t2"]
|
|
|
| LABEL_DICT={
|
| "0":"backgroud",
|
| "1":"non-enhancing tumor",
|
| "2":"edema",
|
| "4":"enhancing tumour"
|
| }
|
|
|
|
|
|
|
|
|
|
|
| def find_metadata_files(path):
|
|
|
| search_pattern = os.path.join(path, '*.csv')
|
| return glob.glob(search_pattern, recursive=True)
|
|
|
| def find_image_dirs(path):
|
| return os.listdir(path)
|
|
|
|
|
| def load_dicom_images(folder_path):
|
| reader = sitk.ImageSeriesReader()
|
| dicom_names = reader.GetGDCMSeriesFileNames(folder_path)
|
| reader.SetFileNames(dicom_names)
|
| image = reader.Execute()
|
| return dicom_names,image
|
|
|
|
|
| def load_dicom_tag(imgs):
|
| reader = sitk.ImageFileReader()
|
|
|
| reader.SetFileName(imgs)
|
| reader.ReadImageInformation()
|
|
|
| tag=reader.Execute()
|
| return tag
|
|
|
| def load_nrrd(fp):
|
| return sitk.ReadImage(fp)
|
|
|
|
|
| def load_brtas_images(series_files):
|
| '''
|
| 每个病例包含四种不同序列的 3D MRI 扫描(均已进行预处理,如配准、重采样到 1mm³ 各向同性、颅骨剥离)
|
| 将多个分开的模态合并,构建第四个维度的数组,分别按照FLAIR,T1,T1CE,T2顺序存放
|
| '''
|
| reader = sitk.ImageSeriesReader()
|
| reader.SetFileNames(series_files)
|
| image = reader.Execute()
|
| return image
|
|
|
| def save_nifti(image, output_path, folder_path):
|
|
|
| output_dirpath = os.path.dirname(output_path)
|
| if not os.path.exists(output_dirpath):
|
| print(f"Creating directory {output_dirpath}")
|
| os.makedirs(output_dirpath)
|
|
|
| image.SetMetaData("FolderPath", folder_path)
|
| sitk.WriteImage(image, output_path)
|
|
|
|
|
| def convert_windows_to_linux_path(windows_path):
|
|
|
|
|
| linux_path = windows_path.replace('\\', '/')
|
| if ':' in linux_path:
|
| linux_path = linux_path.split(':', 1)[1]
|
| return linux_path
|
|
|
| def main(target_path, output_dir):
|
| metadata_files = find_metadata_files(target_path)
|
| pid_dirs=find_image_dirs(target_path)
|
|
|
| failed_files = []
|
| if not os.path.isdir(output_dir):
|
| os.makedirs(output_dir)
|
| json_output_path = os.path.join(output_dir, 'nifti_mappings.json')
|
| failed_files_path = os.path.join(output_dir, 'failed_files.json')
|
|
|
|
|
|
|
| if not os.path.exists(json_output_path):
|
| with open(json_output_path, 'w') as json_file:
|
| json.dump({}, json_file)
|
| meta_file=os.path.join(target_path,'name_mapping.csv')
|
| survival_file=os.path.join(target_path,'survival_info.csv')
|
|
|
| val_meta_file=os.path.join(target_path,'name_mapping_validation_data.csv')
|
| val_survival_file=os.path.join(target_path,'survival_evaluation.csv')
|
|
|
|
|
| if os.path.isfile(meta_file):
|
| mf_flag=True
|
| df_meta=pd.read_csv(meta_file,sep=',')
|
| else:
|
| mf_flag=False
|
|
|
| if os.path.isfile(survival_file):
|
| sf_flag=True
|
| df_survial=pd.read_csv(survival_file,sep=',')
|
| else:
|
| sf_flag=False
|
|
|
| if os.path.isfile(val_meta_file):
|
| vmf_flag=True
|
|
|
| vdf_meta=pd.read_csv(val_meta_file,sep=',')
|
| else:
|
| vmf_flag=False
|
|
|
| if os.path.isfile(val_survival_file):
|
| vsf_flag=True
|
|
|
| vdf_survial=pd.read_csv(val_survival_file,sep=',')
|
| else:
|
| vsf_flag=False
|
|
|
|
|
| if pid_dirs:
|
| for data_dir in tqdm(pid_dirs, desc="Processing pid dirs"):
|
| if not os.path.isdir(os.path.join(target_path,data_dir)):
|
| continue
|
|
|
| if 'Training' in data_dir:
|
| tr_flag=True
|
| else:
|
| tr_flag=False
|
|
|
|
|
|
|
|
|
|
|
|
|
| full_path=os.path.join(target_path,data_dir)
|
| meta = meta_data()
|
| if tr_flag:
|
|
|
| data_info_row=df_meta[df_meta[meta_info_dict['training']['meta_id_name']]==data_dir]
|
| survival_file_row=df_survial[df_survial[meta_info_dict['training']['survival_id_name']]==data_dir]
|
|
|
| if data_info_row.shape[0]>0:
|
| data_info_row=data_info_row.reset_index()
|
|
|
| meta_image_id=data_info_row[meta_info_dict['training']['meta_id_name']][0]
|
| meta_image_grade=data_info_row[meta_info_dict['training']['meta_grade_name']][0]
|
|
|
| else:
|
| meta_image_id=data_dir
|
| meta_image_grade=''
|
|
|
|
|
| if survival_file_row.shape[0]>0:
|
| survival_file_row=survival_file_row.reset_index()
|
|
|
| meta_image_age=survival_file_row[meta_info_dict['training']['meta_age_name']][0]
|
| meta_image_survival=survival_file_row[meta_info_dict['training']['meta_survival_name']][0]
|
| meta_image_status=survival_file_row[meta_info_dict['training']['meta_status_name']][0]
|
| else:
|
| meta_image_age=''
|
| meta_image_survival=''
|
| meta_image_status=''
|
| else:
|
|
|
| data_info_row=vdf_meta[vdf_meta[meta_info_dict['validation']['meta_id_name']]==data_dir]
|
|
|
| survival_file_row=vdf_survial[vdf_survial[meta_info_dict['validation']['survival_id_name']]==data_dir]
|
|
|
| if data_info_row.shape[0]>0:
|
| data_info_row=data_info_row.reset_index()
|
|
|
| meta_image_id=data_info_row[meta_info_dict['validation']['meta_id_name']][0]
|
| meta_image_grade=''
|
|
|
| else:
|
| meta_image_id=data_dir
|
| meta_image_grade=''
|
|
|
|
|
| if survival_file_row.shape[0]>0:
|
| survival_file_row=survival_file_row.reset_index()
|
|
|
| meta_image_age=survival_file_row[meta_info_dict['validation']['meta_age_name']][0]
|
| meta_image_survival=''
|
| meta_image_status=survival_file_row[meta_info_dict['validation']['meta_status_name']][0]
|
| else:
|
| meta_image_age=''
|
| meta_image_survival=''
|
| meta_image_status=''
|
|
|
|
|
|
|
|
|
| try:
|
|
|
|
|
|
|
| series_files=[os.path.join(full_path,"%s_%s.nii"%(data_dir,sm))for sm in SERIES_ORDER]
|
|
|
| series_flag=[os.path.isfile(os.path.join(full_path,"%s_%s.nii"%(data_dir,sm)))for sm in SERIES_ORDER]
|
| series_files=[series_files[index] for index, value in enumerate(series_flag) if value]
|
| sub_modality=[SUB_MODALITY[index] for index, value in enumerate(series_flag) if value]
|
| if len(series_files)>0:
|
|
|
| sitk_img_original=load_brtas_images(series_files)
|
|
|
| else:
|
| print("病例数据%s为空"%data_dir)
|
| continue
|
|
|
|
|
| original_spacing = list(sitk_img_original.GetSpacing())
|
| original_size = list(sitk_img_original.GetSize())
|
|
|
| modality="MRI"
|
| study='BRATS_2020'
|
| CIA_other_info = {
|
| 'metadata_file':''
|
|
|
| }
|
|
|
| if tr_flag:
|
| CIA_other_info['split'] = "train"
|
| CIA_other_info['metadata_file']=meta_file
|
|
|
| else:
|
| CIA_other_info['split'] = "validation"
|
| CIA_other_info['metadata_file']=val_meta_file
|
|
|
|
|
|
|
|
|
| CIA_other_info['Image_id']=meta_image_id
|
| CIA_other_info['Grade']=meta_image_grade
|
| CIA_other_info['Age']=str(meta_image_age)
|
| CIA_other_info['Survival']=str(meta_image_survival)
|
| CIA_other_info['ResectionStatus']=meta_image_status
|
|
|
|
|
| meta.add_keyvalue('Spacing_mm',1.0)
|
| meta.add_keyvalue('OriImg_path',",".join(series_files))
|
| meta.add_keyvalue('Size',original_size)
|
| meta.add_keyvalue('Modality',modality)
|
| meta.add_keyvalue('Dataset_name',study)
|
| meta.add_keyvalue('ROI','head')
|
|
|
| sub_modality_dict={}
|
| for idx,value in enumerate(series_flag):
|
| if value:
|
| sub_modality_dict[str(idx)]=SUB_MODALITY[idx]
|
|
|
| meta.add_keyvalue('Sub_modality',sub_modality_dict)
|
|
|
| if tr_flag:
|
|
|
| meta.add_keyvalue('Label_Dict',LABEL_DICT)
|
|
|
| output_image_file = os.path.join(output_dir,data_dir, f"{data_dir}.nii.gz")
|
|
|
|
|
| save_nifti(sitk_img_original, output_image_file, full_path)
|
| print(f"Saved NIfTI file to {output_image_file}")
|
|
|
|
|
| if tr_flag:
|
| label_path_dict={}
|
| full_label_file=os.path.join(full_path,"%s_seg.nii"%(data_dir))
|
|
|
| process_label_path=os.path.join(output_dir,data_dir,'segmentation')
|
|
|
| processed_lbl_full_path=os.path.join(process_label_path, f"{data_dir}.nii.gz")
|
|
|
| if not os.path.isdir(process_label_path):
|
| os.makedirs(process_label_path,exist_ok=True)
|
|
|
| if not os.path.isfile(full_label_file):
|
| pass
|
| label_flag=False
|
| else:
|
| sitk_lbl_original = util.load_nifti(full_label_file)
|
| util.save_nifti(sitk_lbl_original, processed_lbl_full_path, full_label_file)
|
| print(f"Saved Segemention NIfTI file to {processed_lbl_full_path}")
|
|
|
| label_path_dict['brain'] = processed_lbl_full_path
|
| label_flag=True
|
|
|
| if label_flag:
|
| meta.add_keyvalue('Task',TASK_VALUE)
|
| meta.add_keyvalue('Label_path',{TASK_VALUE:label_path_dict})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| print(sitk_img_original.GetSize(),sitk_lbl_original.GetSize())
|
|
|
| except Exception as e:
|
| print(e)
|
| failed_files.append(data_dir)
|
| print(f"Failed to load BRATS images from {data_dir}")
|
| continue
|
|
|
|
|
|
|
| meta.add_extra_keyvalue('Metadata',CIA_other_info)
|
|
|
|
|
|
|
| with open(json_output_path, 'r+') as json_file:
|
| existing_mappings = json.load(json_file)
|
| existing_mappings[output_image_file] = meta.get_meta_data()
|
| json_file.seek(0)
|
|
|
| json.dump(existing_mappings, json_file, indent=4)
|
| json_file.truncate()
|
|
|
|
|
|
|
| with open(failed_files_path, "w") as json_file:
|
| json.dump(failed_files, json_file)
|
|
|
| print(f"The list has been written to {failed_files_path}")
|
| print(f"Saved NIfTI mappings to {json_output_path}")
|
|
|
| if __name__ == "__main__":
|
| parser = argparse.ArgumentParser(description="Process DICOM files and save as NIfTI.")
|
| parser.add_argument("--target_path", type=str, help="Path to the target directory containing metadata files.", default="/home/data/Github/data/data_gen_def/DATASETS/BRATS/BRATS2020/")
|
| parser.add_argument("--output_dir", type=str, help="Directory to save the NIfTI files.", default="/home/data/Github/data/data_gen_def/DATASETS_processed/BRATS/BRATS2020")
|
| args = parser.parse_args()
|
| print(args.target_path, args.output_dir)
|
| main(args.target_path, args.output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|