philippendres's picture
Upload folder using huggingface_hub
907462b verified
Raw
History Blame Contribute Delete
2.8 kB
# -*- coding: utf-8 -*-
"""
ICIAR2018 - Grand Challenge on Breast Cancer Histology Images
https://iciar2018-challenge.grand-challenge.org/home/
"""
import xml.etree.ElementTree as ET
import numpy as np
from imageio import imwrite
import cv2
import os
import openslide
def findExtension(directory,extension='.xml'):
files = []
for file in os.listdir(directory):
if file.endswith(extension):
files += [file]
files.sort()
return files
def fillImage(image, coordinates,color=255):
cv2.fillPoly(image, coordinates, color=color)
return image
def readXML(filename):
tree = ET.parse(filename)
root = tree.getroot()
regions = root[0][1].findall('Region')
pixel_spacing = float(root.get('MicronsPerPixel'))
labels = []
coords = []
length = []
area = []
for r in regions:
area += [float(r.get('AreaMicrons'))]
length += [float(r.get('LengthMicrons'))]
try:
label = r[0][0].get('Value')
except:
label = r.get('Text')
if 'benign' in label.lower():
label = 1
elif 'in situ' in label.lower():
label = 2
elif 'invasive' in label.lower():
label = 3
labels += [label]
vertices = r[1]
coord = []
for v in vertices:
x = int(v.get('X'))
y = int(v.get('Y'))
coord += [[x,y]]
coords += [coord]
return coords,labels,length,area,pixel_spacing
def saveImage(filename,image_size,coordinates,labels,sample=1):
#red is 'benign', green is 'in situ' and blue is 'invasive'
#colors = [(0,0,0),(255,0,0),(0,255,0),(0,0,255)]
colors = [0,1,2,3]
img = np.zeros(image_size,dtype=np.uint8)
for c,l in zip(coordinates,labels):
img1 = fillImage(img,[np.int32(np.stack(c))],color=colors[l])
img2 = img1[::sample,::sample]
np.save(filename,img2)
#imwrite(filename,img2)
if __name__=='__main__':
folder_name = '/home/ubuntu/thesis/data/ICIA2018/ICIAR2018_BACH_Challenge/WSI/' #path to the dataset folder
files = findExtension(folder_name)
store = []
for file in files:
file_name = file[:-4]
print('Reading scan',file_name)
scan = openslide.OpenSlide(folder_name+file_name+'.svs')
dims = scan.dimensions
#img_size = (dims[1],dims[0],3)
img_size = (dims[1],dims[0])
print('Generating thumbnail')
tree = ET.parse(folder_name+file)
coords,labels,length,area,pixel_spacing = readXML(folder_name+file)
store += [[coords,labels,length,area,pixel_spacing]]
saveImage(folder_name+file_name,img_size,coords,labels)