{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# create dataset\n", "\n", "as this study is based on various datasets, the step of creating dataset is kind of complicated, the steps will be explained with following examples" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\zy7\\.conda\\envs\\torch2\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import sys\n", "sys.path.append('./dataprocesser')\n", "sys.path.append('./synthrad_conversion')\n", "from dataprocesser import step2_create_segmentation as createseg\n", "from dataprocesser import step1_init_data_list as init_data_list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## at first we should have a function to load our customised dataset\n", "for better organizing, we should load our data in form of imagepath + pID\n", "here is an example for synthRAD dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "here we can try another dataset, namely LITS" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 1 files in E:\\Projects\\yang_proj\\data\\LITS \n", "\n", "Found 1 files in E:\\Projects\\yang_proj\\data\\LITS \n", "\n", "E:\\Projects\\yang_proj\\data\\LITS\\volume-0.nii volume-0 \n", "\n" ] } ], "source": [ "import os\n", "from dataprocesser.dataset_LITS import list_img_pID_from_LITS_folder\n", "\n", "dir = r'E:\\Projects\\yang_proj\\data\\LITS'\n", "LITS_images, LITS_patient_IDs = list_img_pID_from_LITS_folder(dir, isseg=False)\n", "LITS_segs, _ = list_img_pID_from_LITS_folder(dir, isseg=True)\n", "for image, pID in zip(LITS_images, LITS_patient_IDs):\n", " print(image, pID, '\\n')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# step 2: create segmentations with the data list using totalsegmentator\n", "\n", "here we use the new lits dataset as example" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "use GPU: NVIDIA RTX A6000\n", "create segmentation mask for E:\\Projects\\yang_proj\\data\\LITS\\volume-0.nii\n", "\n", "If you use this tool please cite: https://pubs.rsna.org/doi/10.1148/ryai.230024\n", "\n", "Resampling...\n", " Resampled in 4.00s\n", "Predicting part 1 of 5 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\zy7\\.conda\\envs\\torch2\\Lib\\site-packages\\nnunetv2\\inference\\predict_from_raw_data.py:84: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " checkpoint = torch.load(join(model_training_output_dir, f'fold_{f}', checkpoint_name),\n", "c:\\Users\\zy7\\.conda\\envs\\torch2\\Lib\\site-packages\\threadpoolctl.py:1214: RuntimeWarning: \n", "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n", "the same time. Both libraries are known to be incompatible and this\n", "can cause random crashes or deadlocks on Linux when loaded in the\n", "same Python program.\n", "Using threadpoolctl may cause crashes or deadlocks. For more\n", "information and possible workarounds, please see\n", " https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n", "\n", " warnings.warn(msg, RuntimeWarning)\n", "100%|██████████| 27/27 [00:01<00:00, 17.86it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting part 2 of 5 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\zy7\\.conda\\envs\\torch2\\Lib\\site-packages\\nnunetv2\\inference\\predict_from_raw_data.py:84: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", " checkpoint = torch.load(join(model_training_output_dir, f'fold_{f}', checkpoint_name),\n", "100%|██████████| 27/27 [00:00<00:00, 40.81it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting part 3 of 5 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 27/27 [00:00<00:00, 45.97it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting part 4 of 5 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 27/27 [00:00<00:00, 38.12it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Predicting part 5 of 5 ...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 27/27 [00:00<00:00, 46.31it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Predicted in 234.94s\n", "Resampling...\n", "Saving segmentations...\n", " Saved in 0.38s\n", "segmentation mask is saved as E:\\Projects\\yang_proj\\data\\LITS\\volume-0_seg.nii\n" ] } ], "source": [ "from dataprocesser import step2_create_segmentation as createseg\n", "import torch\n", "import os\n", "gpu_index = 1\n", "device = 'cuda'\n", "task = 'total' # 'total'\n", "GPU_ID=[gpu_index]\n", "\n", "# cuda:{GPU_ID[0]}\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"]=f'{gpu_index}'\n", "device = 'cuda' #torch.device(f'cuda' if torch.cuda.is_available() else 'cpu') # 0=TitanXP, 1=P5000\n", "\n", "print('use GPU: ', torch.cuda.get_device_name(f'cuda'))\n", "createseg.run(target_file_list = LITS_images, dataset=None, device=device, task=task) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "in this way we can get the segmentation as filename_seg.nii.gz that we want\n", "\n", "## step 3: create csv file\n", "\n", "we import the created datalist to a csv file, so that we can modify and reuse it later\n", "\n", "we should get an example.csv with following content:\n", "\n", "id,Aorta_diss,seg,img\n", "\n", "1PA001,0,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA001\\ct_seg.nii.gz,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA001\\ct.nii.gz\n", "\n", "1PA004,0,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA004\\ct_seg.nii.gz,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA004\\ct.nii.gz\n", "\n", "1PA005,0,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA005\\ct_seg.nii.gz,E:\\Projects\\yang_proj\\data\\synthrad\\Task1\\pelvis\\1PA005\\ct.nii.gz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# create and save the new LITS dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['E:\\\\Projects\\\\yang_proj\\\\data\\\\LITS\\\\volume-0.nii'] \n", " ['E:\\\\Projects\\\\yang_proj\\\\data\\\\LITS\\\\volume-0_seg.nii'] \n", " ['volume-0']\n" ] } ], "source": [ "from dataprocesser import create_csv \n", "output_csv_file = r'tutorial_lits.csv'\n", "print(LITS_images,'\\n',LITS_segs, '\\n',LITS_patient_IDs)\n", "LITS_Aorta_diss = [0] * len(LITS_images)\n", "datalist_LITS = [[id,Aorta_diss,seg,image] for id,Aorta_diss,seg,image in zip(LITS_patient_IDs, LITS_Aorta_diss, LITS_segs, LITS_images)]\n", "create_csv.create_csv_info_file(datalist_LITS, output_csv_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# start preparation for training or inference" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## create a monai/torch dataloader\n", "\n", "this dataloader uses add_CreateContour_MergeMask_MaskHUAssign_transforms, which assigns values to segmentation classes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Infer_ddpm2d_seg2med\n", "create combined segmentation dataset with assigned value\n", "create base dataset\n", "combined segmentation assigned dataset use keys: ['source', 'target', 'mask']\n", "scale2000 normalization\n", "scale2000 normalization\n", "use train csv: tutorial.csv\n", "use test csv: tutorial.csv\n", "use keys for creating volume dataset: ['source', 'target', 'mask']\n", "model name: ddpm2d_seg2med\n", "val_use_patch: False\n", "original image shape: torch.Size([1, 1, 512, 512, 75])\n" ] } ], "source": [ "from synthrad_conversion.utils.my_configs_yacs import init_cfg\n", "from dataprocesser.step1_init_data_list import init_dataset\n", "config_path = 'tutorial_config.yaml'\n", "opt=init_cfg(config_path)\n", "model_name_path='Infer_'+opt.model_name + opt.name_prefix\n", "print(model_name_path)\n", "dataset_name = 'combined_simplified_csv_seg_assigned'\n", "loader, opt, my_paths = init_dataset(opt, model_name_path, dataset_name)\n", "train_loader = loader.train_loader\n", "val_loader = loader.val_loader\n", "first_batch = next(iter(val_loader))\n", "first_input = first_batch[opt.dataset.indicator_A]\n", "first_target = first_batch[opt.dataset.indicator_B]\n", "print(\"original image shape:\", first_target.shape)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# visualize a slice:\n", "import matplotlib.pyplot as plt\n", "def plottorchdata(data, i, slice_id):\n", " data = data.cpu().numpy().squeeze().squeeze()\n", " dataimg = f'tutorial_{i}.png'\n", " plt.figure(i)\n", " plt.imshow(data[..., slice_id], cmap='gray')\n", " plt.savefig(dataimg)\n", "slice_id = 50\n", "plottorchdata(first_input, 0, slice_id)\n", "plottorchdata(first_target, 1, slice_id)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# run inference!" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "use GPU: NVIDIA RTX A6000\n" ] } ], "source": [ "import torch\n", "device = torch.device(f'cuda:1' if torch.cuda.is_available() else 'cpu') # 0=TitanXP, 1=P5000\n", "print('use GPU: ', torch.cuda.get_device_name(device))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "all available models:\n", "ddpm2d_seg2med\n", "spade_ddpm2d_seg2med\n", "pix2pix\n", "cycle_gan\n", "AttentionUnet\n", "resUnet\n", "use model: ddpm2d_seg2med\n", "use pretrained model: logs\\241118ddpm_512.pt\n", "continue from epoch 171\n", "continue from step 1783714\n", "aorta dissection not manually set, use the value in csv file\n", "original image shape: torch.Size([1, 1, 512, 512, 75])\n", "\n", " inference val set from 0 to 0 batch\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/1 [00:00=3:\n", " patient_ID = parts[0]\n", " sign_name = parts[-2]\n", " slice_ID = parts[-1]\n", "\n", " if sign_name == 'seg':\n", " new_filename = f'{patient_ID}_seg_{slice_ID}'\n", " elif sign_name == 'input':\n", " new_filename = f'{patient_ID}_seg_{slice_ID}'\n", " elif sign_name == 'target':\n", " new_filename = f'{patient_ID}_target_{slice_ID}'\n", " else: # for synthesized and mask\n", " new_filename = f'{patient_ID}_{sign_name}_{slice_ID}'\n", " if not new_filename.endswith('.nii.gz'):\n", " new_filename += '.nii.gz'\n", " new_file_path = os.path.join(folder_path, new_filename)\n", " os.rename(old_file_path, new_file_path)\n", " print(f'Renamed {old_file_path} to {new_file_path}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from seg2med_evaluation import stack_volume_synthrad_anish \n", "import importlib\n", "importlib.reload(stack_volume_synthrad_anish) # 强制重新加载模块\n", "root_path = r'logs'\n", "folder_list = [\n", " '20250403_0359_Infer_ddpm2d_seg2med',\n", " ]\n", "for folder in folder_list:\n", " folder_path = os.path.join(root_path, folder, 'saved_outputs')\n", " # rename(folder_path)\n", " stack_volume_synthrad_anish.stack_volume(folder_path)" ] } ], "metadata": { "kernelspec": { "display_name": "torch", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }