Tianyinus's picture
init submit
edcf5ee verified
"""
CPIA_ROI_0_BreakHis.py ver 23.6.9
This code aims to split images of different zooming size into different folders,this code also puts different classes
into different folders
"""
import argparse
import os
from PIL import Image
from tqdm import tqdm
def get_args_parser():
parser = argparse.ArgumentParser('CPIA dataset ROI part Warwick_QU dataset pre-processing', add_help=False)
parser.add_argument('--input_root', default='..', type=str,
help='The root that contains the orginal images. Please make sure that there is no unwanted '
'images with corresponding suffix under the same root')
parser.add_argument('--output_root', default='..', type=str,
help='The root for the resized and cropped output images. If the root is not provided, this '
'program will automatically make an output path')
return parser
def make_and_clear_path(file_pack_path):
if not os.path.exists(file_pack_path):
os.makedirs(file_pack_path)
def find_all_files(root, suffix=None):
"""
Return a list of file paths ended with specific suffix
"""
res = []
for root, _, files in os.walk(root):
for f in files:
if suffix is not None and not f.endswith(suffix):
continue
res.append(os.path.join(root, f))
print(res)
print(len(res))
return res
def save_file(f_image, save_dir, suffix='.jpg'):
filepath, _ = os.path.split(save_dir)
if not os.path.exists(filepath):
os.makedirs(filepath)
f_image.save(save_dir + suffix)
def pc_to_stander(root_from, root_to):
root_target = root_to
make_and_clear_path(root_target)
f_dir_list = find_all_files(root=root_from, suffix=".png")
print(f_dir_list)
name_dict = {}
for seq in tqdm(range(len(f_dir_list))):
f_dir = f_dir_list[seq]
_, str = os.path.split(f_dir)
mp = str.split("-")[-2]
type = (str.split("_")[2]).split("-")[0]
name = str.split(".")[0]
print(mp)
print(type)
f_img = Image.open(f_dir)
if mp == '40':
root_target = os.path.join(root_to, '40')
elif mp == '100':
root_target = os.path.join(root_to, '100')
elif mp == '200':
root_target = os.path.join(root_to, '200')
else:
root_target = os.path.join(root_to, '400')
if type == 'DC':
root_target = os.path.join(root_target, 'ductal_carcinoma')
elif type == 'LC':
root_target = os.path.join(root_target, 'lobular_carcinoma')
elif type == 'MC':
root_target = os.path.join(root_target, 'mucinous_carcinoma')
elif type == 'PC':
root_target = os.path.join(root_target, 'papillary_carcinoma')
elif type == 'A':
root_target = os.path.join(root_target, 'adenosis')
elif type == 'F':
root_target = os.path.join(root_target, 'fibroadenoma')
elif type == 'PT':
root_target = os.path.join(root_target, 'phyllodes_tumor')
else:
root_target = os.path.join(root_target, 'tubular_adenoma')
save_dir = os.path.join(root_target, name)
name_dict[save_dir] = f_dir
save_file(f_img, save_dir)
root_target, _ = os.path.split(root_to)
root_target, _ = os.path.split(root_target)
if __name__ == '__main__':
args = get_args_parser()
args = args.parse_args()
input_root = args.input_root
output_root = args.output_root
pc_to_stander(input_root, output_root)