CellPilot / preprocessing_scripts /preprocess_camelyon.py
philippendres's picture
Upload folder using huggingface_hub
907462b verified
Raw
History Blame Contribute Delete
2.38 kB
import os
import slideio
import numpy as np
from PIL import Image
from tqdm import tqdm
data_directory = "/vol/data/histo_datasets/CAMELYON/CAMELYON17/"
mask_list = os.listdir(data_directory + "masks/")
for m in tqdm(mask_list):
slide = slideio.open_slide(data_directory + "masks/" + m)
image_slide = slideio.open_slide(data_directory + "images/" + m[:-9] + ".tif")
scene = slide.get_scene(0)
image_scene = image_slide.get_scene(0)
dim0 = int(np.ceil(scene.size[0] / 1024))
dim1 = int(np.ceil(scene.size[1] / 1024))
resolutions = np.ceil(np.log2(max(dim0,dim1)))
for r in range(int(resolutions) + 1):
res = 2**r * 1024
dim0 = int(np.ceil(scene.size[0] / res))
dim1 = int(np.ceil(scene.size[1] / res))
last_dim = (int(scene.size[0] % res), int(scene.size[1] % res))
if last_dim[0] == 0 and last_dim[1] == 0:
last_dim = (res, res)
elif last_dim[0] == 0:
last_dim = (res, last_dim[1])
elif last_dim[1] == 0:
last_dim = (last_dim[0], res)
for i in range(dim0):
for j in range(dim1):
if i == dim0-1 and j == dim1-1:
width = last_dim[0]
height = last_dim[1]
elif i == dim0-1:
width = last_dim[0]
height = res
elif j == dim1-1:
width = res
height = last_dim[1]
else:
width = res
height = res
mask = scene.read_block((i*res,j*res, width, height), (width // (2**r), height // (2**r)))
mask = np.where(mask == 2, 1, 0).astype(np.uint8)
if (np.max(mask) == 1):
image = image_scene.read_block((i*res,j*res, width, height), (width // (2**r), height // (2**r)))
# Save image and mask
# Save image
Image.fromarray(image).save(data_directory + "images_patches/" + m[:-9] + "_{}_{}_{}_{}_{}_{}.png".format(i*res,j*res,width, height, width // (2**r), height // (2**r)))
# Save mask
Image.fromarray(mask).save(data_directory + "masks_patches/" + m[:-9] + "_{}_{}_{}_{}_{}_{}.png".format(i*res,j*res,width, height, width // (2**r), height // (2**r)))