Spaces:
No application file
No application file
| # -*- coding: utf-8 -*- | |
| """ | |
| ICIAR2018 - Grand Challenge on Breast Cancer Histology Images | |
| https://iciar2018-challenge.grand-challenge.org/home/ | |
| """ | |
| import xml.etree.ElementTree as ET | |
| import numpy as np | |
| from imageio import imwrite | |
| import cv2 | |
| import os | |
| import openslide | |
| def findExtension(directory,extension='.xml'): | |
| files = [] | |
| for file in os.listdir(directory): | |
| if file.endswith(extension): | |
| files += [file] | |
| files.sort() | |
| return files | |
| def fillImage(image, coordinates,color=255): | |
| cv2.fillPoly(image, coordinates, color=color) | |
| return image | |
| def readXML(filename): | |
| tree = ET.parse(filename) | |
| root = tree.getroot() | |
| regions = root[0][1].findall('Region') | |
| pixel_spacing = float(root.get('MicronsPerPixel')) | |
| labels = [] | |
| coords = [] | |
| length = [] | |
| area = [] | |
| for r in regions: | |
| area += [float(r.get('AreaMicrons'))] | |
| length += [float(r.get('LengthMicrons'))] | |
| try: | |
| label = r[0][0].get('Value') | |
| except: | |
| label = r.get('Text') | |
| if 'benign' in label.lower(): | |
| label = 1 | |
| elif 'in situ' in label.lower(): | |
| label = 2 | |
| elif 'invasive' in label.lower(): | |
| label = 3 | |
| labels += [label] | |
| vertices = r[1] | |
| coord = [] | |
| for v in vertices: | |
| x = int(v.get('X')) | |
| y = int(v.get('Y')) | |
| coord += [[x,y]] | |
| coords += [coord] | |
| return coords,labels,length,area,pixel_spacing | |
| def saveImage(filename,image_size,coordinates,labels,sample=1): | |
| #red is 'benign', green is 'in situ' and blue is 'invasive' | |
| #colors = [(0,0,0),(255,0,0),(0,255,0),(0,0,255)] | |
| colors = [0,1,2,3] | |
| img = np.zeros(image_size,dtype=np.uint8) | |
| for c,l in zip(coordinates,labels): | |
| img1 = fillImage(img,[np.int32(np.stack(c))],color=colors[l]) | |
| img2 = img1[::sample,::sample] | |
| np.save(filename,img2) | |
| #imwrite(filename,img2) | |
| if __name__=='__main__': | |
| folder_name = '/home/ubuntu/thesis/data/ICIA2018/ICIAR2018_BACH_Challenge/WSI/' #path to the dataset folder | |
| files = findExtension(folder_name) | |
| store = [] | |
| for file in files: | |
| file_name = file[:-4] | |
| print('Reading scan',file_name) | |
| scan = openslide.OpenSlide(folder_name+file_name+'.svs') | |
| dims = scan.dimensions | |
| #img_size = (dims[1],dims[0],3) | |
| img_size = (dims[1],dims[0]) | |
| print('Generating thumbnail') | |
| tree = ET.parse(folder_name+file) | |
| coords,labels,length,area,pixel_spacing = readXML(folder_name+file) | |
| store += [[coords,labels,length,area,pixel_spacing]] | |
| saveImage(folder_name+file_name,img_size,coords,labels) |