|
|
| '''
|
| writebyygq
|
| createon2025-08-30
|
|
|
|
|
| BL = Baseline(基线)
|
| FU = Follow-up(随访)
|
|
|
| 1. Baseline (基线)
|
| 含义:指的是在疾病初期、治疗前或某个特定时间点第一次拍摄的影像(如CT、MRI、X光)。
|
| 作用:这份影像作为评估病情严重程度和后续变化的“起跑线”或“参照物”。医生通过将未来的影像与基线影像进行比较,来判断病情的变化。
|
| 2. Follow-up (随访)
|
| 含义:指的是在基线影像之后,按计划或根据病情需要再次拍摄的影像。
|
| 作用:用于评估治疗效果(如肿瘤是否缩小)、监测疾病进展(如病灶是否增大或增多)、或观察术后恢复情况。
|
| “BL FU” 在报告中的应用场景:
|
| 当放射科医生在报告中写下“BL FU”或“compare to BL FU”时,他们的意思是:
|
| “本次的影像检查结果,需要与之前拍摄的基线影像进行对比,以评估变化。”
|
|
|
| 例如:
|
| 肿瘤患者:一位肺癌患者在化疗前做了一次CT(作为基线BL),化疗2个周期后又做了一次CT(作为随访FU)。放射科医生会在新报告中将两次影像进行对比,并描述:“与20XX年X月X日的基线CT(BL FU) 相比,右肺下叶肿块明显缩小。”
|
| 慢性病患者:如肺炎、肝硬化、多发性硬化等需要长期监测的疾病,医生都会通过对比基线片和随访片来精确判断病情是好转、稳定还是恶化。
|
|
|
| label:
|
| 0:backgroud 1-N: tumor,其中具体多少数值需要读取对应json文件信息
|
|
|
| 编号ID:10位的16进制编号,每一个对应一个csv文件,对一个或多个BL和FU。。每个对应相应的json文件和mask标签文件--
|
| 备注:CSV包含所有的label信息和编号,如果考虑按照tissue进行分别存储,可以考虑对mask文件结合csv/json信息进行提取相同的lesion_type分别存储label_dict
|
| BL的以及对应的MASK都是inputsTr目录下面
|
| 命名形式:
|
| 93dd4de5cd_BL_img_BL_img_00.nii.gz
|
| 93dd4de5cd_BL_mask_BL_img_00.nii.gz
|
| 93dd4de5cd_BL_00.json
|
|
|
| FU在inputsTr目录下面,对应的mask在targetsTr力猛
|
| 命名形式:
|
| c6f057b865_FU_img_FU_img_00.nii.gz
|
| c6f057b865_FU_mask_FU_img_00.nii.gz
|
| c6f057b865_FU_img_FU_img_01.nii.gz
|
| c6f057b865_FU_mask_FU_img_01.nii.gz
|
| c6f057b865_FU_00.json
|
| c6f057b865_FU_01.json
|
|
|
|
|
| 元数据信息CSV-病灶或者癌症信息--对应基线的位置,对应的基线影像编号,位置,以及对应的随访位置编号以及病灶位置
|
| lesion_id,cog_bl,img_id_bl,cog_propagated,cog_fu,img_id_fu,lesion_type
|
| 1,84.9530896759608 273.525433308214 148.780708364732,00,108.78432777048911 320.7355032513338 543.6178096475021,116.270833333333 317.46130952381 548.446428571429,00,Lung
|
| 2,206.307026476578 258.39816700611 177.256619144603,00,202.79674663210054 297.81536880017677 566.3173808142716,197.325938566553 300.598976109215 565.804607508532,00,Lymph node
|
|
|
| json格式样例
|
| {
|
| "name": "Points of interest",
|
| "points": [
|
| {
|
| "name": "1",
|
| "point": [
|
| 84.9530896759608,
|
| 273.525433308214,
|
| 148.780708364732
|
| ]
|
| },
|
| {
|
| "name": "2",
|
| "point": [
|
| 206.307026476578,
|
| 258.39816700611,
|
| 177.256619144603
|
| ]
|
| }
|
| ],
|
| "type": "Multiple points",
|
| "version": {
|
| "major": 1,
|
| "minor": 0
|
| }
|
| }
|
|
|
| 20251101补充增加,将病灶编号进行合并同类项目,
|
| 注意处理完成后保留原影像的几何空间信息以及元数据文件信息
|
|
|
|
|
| '''
|
| import os
|
| import glob
|
| import pandas as pd
|
| import SimpleITK as sitk
|
| import argparse
|
| import json
|
| from tqdm import tqdm
|
| from util import meta_data
|
| import util
|
| import numpy as np
|
|
|
|
|
| import shutil
|
|
|
|
|
|
|
| label_id_lut={'backgroud': 0,
|
| 'Lymph node': 1,
|
| 'Lung': 2,
|
| 'Soft tissue / Skin': 3,
|
| 'Liver': 4,
|
| 'Skeleton': 5,
|
| 'Adrenals': 6,
|
| 'Spleen': 7,
|
| 'CNS': 8,
|
| 'Kidney': 9,
|
| 'Heart': 10,
|
| 'Others': 11,
|
| 'unclear': 12,
|
| }
|
|
|
|
|
| TASK_VALUE="segmentation"
|
| CLAMP_RANGE_CT = [-300,300]
|
| CLAMP_RANGE_MRI = None
|
| TARGET_VOXEL_SPACING=None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| LABEL_DICT={
|
| "0":"backgroud",
|
| }
|
| META_COLUMN=['lesion_id', 'cog_bl', 'img_id_bl', 'cog_propagated', 'cog_fu','img_id_fu', 'lesion_type']
|
|
|
|
|
|
|
|
|
|
|
|
|
| def find_metadata_files(path):
|
|
|
| search_pattern = os.path.join(path, '*.csv')
|
| return glob.glob(search_pattern, recursive=True)
|
|
|
| def find_image_dirs(path):
|
| return os.listdir(path)
|
|
|
|
|
| def load_dicom_images(folder_path):
|
| reader = sitk.ImageSeriesReader()
|
| dicom_names = reader.GetGDCMSeriesFileNames(folder_path)
|
| reader.SetFileNames(dicom_names)
|
| image = reader.Execute()
|
| return dicom_names,image
|
|
|
|
|
| def load_dicom_tag(imgs):
|
| reader = sitk.ImageFileReader()
|
|
|
| reader.SetFileName(imgs)
|
| reader.ReadImageInformation()
|
|
|
| tag=reader.Execute()
|
| return tag
|
|
|
| def load_nrrd(fp):
|
| return sitk.ReadImage(fp)
|
|
|
|
|
| def merge_images(series_files):
|
| '''
|
| 每个病例包含两种不同序列的 CT:CT/PET--0000/0001
|
| 将多个分开的模态合并,构建第四个维度的数组,分别按照CT,PET顺序存放
|
| '''
|
| reader = sitk.ImageSeriesReader()
|
| reader.SetFileNames(series_files)
|
| image = reader.Execute()
|
| return image
|
|
|
| def save_nifti(image, output_path, folder_path):
|
|
|
| output_dirpath = os.path.dirname(output_path)
|
| if not os.path.exists(output_dirpath):
|
| print(f"Creating directory {output_dirpath}")
|
| os.makedirs(output_dirpath)
|
|
|
| image.SetMetaData("FolderPath", folder_path)
|
| sitk.WriteImage(image, output_path)
|
|
|
|
|
| def convert_windows_to_linux_path(windows_path):
|
|
|
|
|
| linux_path = windows_path.replace('\\', '/')
|
| if ':' in linux_path:
|
| linux_path = linux_path.split(':', 1)[1]
|
| return linux_path
|
|
|
|
|
| def get_filename_list(fp_dir):
|
| all_file_list=glob.glob("%s/*.csv"%fp_dir)
|
|
|
|
|
| return all_file_list
|
|
|
| def check_fname(fname):
|
| if fname.startswith("fdg"):
|
| sid=fname[:14]
|
| sdate=fname[15:25]
|
| else:
|
| sid=fname[:21]
|
| sdate=fname[22:]
|
| return sid,sdate
|
| def main(target_path, output_dir):
|
|
|
| pid_dirs=["inputsTr"]
|
| failed_files = []
|
| if not os.path.isdir(output_dir):
|
| os.makedirs(output_dir)
|
| json_output_path = os.path.join(output_dir, 'nifti_mappings.json')
|
| failed_files_path = os.path.join(output_dir, 'failed_files.json')
|
| meta = meta_data()
|
|
|
|
|
| if not os.path.exists(json_output_path):
|
| with open(json_output_path, 'w') as json_file:
|
| json.dump({}, json_file)
|
|
|
|
|
| input_dir=os.path.join(target_path,'inputsTr')
|
| target_dir=os.path.join(target_path,'targetsTr')
|
|
|
| fp_files=get_filename_list(input_dir)
|
|
|
| if pid_dirs:
|
| for pid_dir in tqdm(pid_dirs, desc="Processing all dataset"):
|
| for fp_file in tqdm(fp_files, desc="Processing all dataset"):
|
| meta_file=fp_file
|
| df_meta=pd.read_csv(meta_file)
|
|
|
| fp_name=os.path.basename(fp_file)[:-4]
|
|
|
| for sub_mod in ['BL','FU']:
|
|
|
| bl_fps=glob.glob("%s/%s_%s*.json"%(input_dir,fp_name,sub_mod))
|
| if len(bl_fps)>0:
|
| for bl_fp in bl_fps:
|
| basename=os.path.basename(bl_fp)[:-5]
|
| bl_fp_name=os.path.basename(bl_fp).replace("_BL_","_BL_img_BL_img_").replace(".json",".nii.gz")
|
| bl_fp_img=os.path.join(input_dir,bl_fp_name)
|
|
|
| if os.path.isfile(bl_fp_img):
|
|
|
|
|
|
|
| bl_mask_name=os.path.basename(bl_fp).replace("_BL_","_BL_mask_BL_img_").replace(".json",".nii.gz")
|
|
|
| bl_fp_mask=os.path.join(input_dir,bl_mask_name)
|
| if os.path.isfile(bl_fp_mask):
|
| label_fp=bl_fp_mask
|
| label_flag=True
|
| else:
|
| bl_fp_mask=os.path.join(target_dir,bl_mask_name)
|
| if os.path.isfile(bl_fp_mask):
|
| label_fp=bl_fp_mask
|
| label_flag=True
|
| else:
|
| label_fp=None
|
| label_flag=False
|
|
|
|
|
| modality="CT"
|
| study='PSMA_Longitudinal_CT'
|
| CIA_other_info = {
|
| 'Image_id':basename,
|
| 'metadata_file':''
|
|
|
| }
|
| CIA_other_info['split'] = "train"
|
|
|
| CIA_other_info['metadata_file']=meta_file
|
| stk_image=util.load_nifti(bl_fp_img)
|
| spacing_info = stk_image.GetSpacing()
|
| size = list(stk_image.GetSize())
|
| resampler =util.get_unisize_resampler(stk_image, interpolator='linear', spacing=spacing_info, size=size)
|
| if resampler is not None:
|
| proces_image = resampler.Execute(stk_image)
|
| print('SPACIE INFO AFTER', proces_image.GetSpacing())
|
| CIA_other_info['Resample'] = True
|
| else:
|
| proces_image = stk_image
|
| CIA_other_info['Resample'] = False
|
|
|
| output_path = os.path.join(output_dir,fp_name, f"{basename}.nii.gz")
|
|
|
| save_nifti(proces_image, output_path, input_dir)
|
| print(f"Saved NIfTI file to {output_path}")
|
|
|
|
|
|
|
|
|
| if label_flag:
|
| label_path_dict = {}
|
| label_stk_img=util.load_nifti(label_fp)
|
|
|
| image_array = sitk.GetArrayFromImage(label_stk_img)
|
|
|
| with open(bl_fp,'r') as fi:
|
| json_info=json.load(fi)
|
|
|
| label_dict={
|
| "0":"backgroud"
|
| }
|
|
|
| update_image_array=np.copy(image_array)
|
|
|
| group_meta=df_meta.groupby('lesion_type')['lesion_id']
|
| for name,group in group_meta:
|
|
|
| ids=group_meta.get_group(name)
|
| target_id=label_id_lut[name]
|
|
|
|
|
|
|
| label_dict[str(target_id)]=name
|
|
|
| for v in ids.tolist():
|
| print(name,v,target_id)
|
| update_image_array[image_array==v]=target_id
|
| print(np.where(update_image_array==10))
|
| image_array=None
|
| label_stk_img_update=sitk.GetImageFromArray(update_image_array)
|
| label_stk_img_update.CopyInformation(label_stk_img)
|
|
|
|
|
| meta_keys = label_stk_img.GetMetaDataKeys()
|
| for key in meta_keys:
|
| value = label_stk_img.GetMetaData(key)
|
| label_stk_img_update.SetMetaData(key, value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| resampler =util.get_unisize_resampler(label_stk_img_update, interpolator='nearest', spacing=spacing_info, size=size)
|
| if resampler is not None:
|
| proces_label = resampler.Execute(label_stk_img_update)
|
|
|
| ary_process_label=sitk.GetArrayFromImage(proces_label)
|
|
|
| if ary_process_label[-1,:,:].mean()==ary_process_label[-1,0,0] and ary_process_label[-1,0,0]>0:
|
| print('momingqimiao',ary_process_label[-1,0,0])
|
| ary_process_label[-1,:,:]=0
|
|
|
| label_stk_img_process=sitk.GetImageFromArray(ary_process_label)
|
| label_stk_img_process.CopyInformation(proces_label)
|
| meta_keys = proces_label.GetMetaDataKeys()
|
| for key in meta_keys:
|
| value = proces_label.GetMetaData(key)
|
| label_stk_img_process.SetMetaData(key, value)
|
|
|
|
|
|
|
| else:
|
| label_stk_img_process = label_stk_img_update
|
|
|
|
|
| try:
|
| assert proces_image.GetSize() == label_stk_img_process.GetSize()
|
| except Exception as e:
|
| failed_files.append(label_fp)
|
| continue
|
|
|
| label_output_path = os.path.join(output_dir, fp_name, TASK_VALUE, f"{basename}.nii.gz")
|
|
|
| label_path_dict['tumor'] = label_output_path
|
| util.save_nifti(label_stk_img_process, label_output_path, label_fp)
|
| print(f"Saved Label Segment NIfTI file to {label_output_path}")
|
|
|
|
|
|
|
| else:
|
| continue
|
|
|
|
|
|
|
|
|
| size_processed = list(proces_image.GetSize())
|
| print('size_processed',size_processed,size)
|
|
|
|
|
| meta.add_keyvalue('Spacing_mm',min(spacing_info[:3]))
|
| meta.add_keyvalue('OriImg_path',bl_fp_img)
|
| meta.add_keyvalue('Size',size_processed)
|
| meta.add_keyvalue('Modality',modality)
|
| meta.add_keyvalue('Dataset_name',study)
|
| meta.add_keyvalue('ROI','whole-body')
|
|
|
|
|
| if label_flag:
|
|
|
| meta.add_keyvalue('Task',TASK_VALUE)
|
|
|
| meta.add_keyvalue('Label_path',{TASK_VALUE:label_path_dict})
|
|
|
| meta.add_keyvalue('Label_Dict',label_dict)
|
|
|
| meta.add_extra_keyvalue('Metadata',CIA_other_info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| with open(json_output_path, 'r+') as json_file:
|
| existing_mappings = json.load(json_file)
|
| existing_mappings[output_path] = meta.get_meta_data()
|
| json_file.seek(0)
|
|
|
| json.dump(existing_mappings, json_file, indent=4)
|
| json_file.truncate()
|
|
|
|
|
|
|
| with open(failed_files_path, "w") as json_file:
|
| json.dump(failed_files, json_file)
|
|
|
| print(f"The list has been written to {failed_files_path}")
|
| print(f"Saved NIfTI mappings to {json_output_path}")
|
|
|
| if __name__ == "__main__":
|
| parser = argparse.ArgumentParser(description="Process DICOM files and save as NIfTI.")
|
| parser.add_argument("--target_path", type=str, help="Path to the target directory containing metadata files.", default="/home/data/ygq/Data_Engineering/PSMA_clean/demo")
|
| parser.add_argument("--output_dir", type=str, help="Directory to save the NIfTI files.", default="//home/data/ygq/Data_Engineering/PSMA_clean/sample/")
|
| args = parser.parse_args()
|
| print(args.target_path, args.output_dir)
|
| main(args.target_path, args.output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|