Dharini Baskaran
code cleanup
0872418
import cv2
import numpy as np
import copy
from pycocotools.coco import COCO
import shapely
from shapely import geometry
import sys
import random
from datetime import datetime
from from_root import from_root
from annotation_builder import AnnotationBuilder as AnnBuild
import pylab
pylab.rcParams['figure.figsize'] = (128.0, 160.0)
from rcnn_model.utils.floorplan_vectorizer_utils import get_image_size, draw_from_coco
data_directory_root = str(from_root("dataset/"))+"/"
category_filter = [2]
image_sample_room_count_threshold = 4
min_sample_size = 400
max_sample_size = 800
samples_per_image = 30
def main():
sample_from_labelme2coco_dataset("train",data_directory_root+"annotations/","sample_data/","validation_images/")
sample_from_labelme2coco_dataset("val",data_directory_root+"annotations/","sample_data/","validation_images/")
### Core Sampling Logic ###
#sample from dataset cocofile created by labelme2coco
#dataset_name should only be "train", "val", or "dataset" based on labelme2coco's output naming conventions
def sample_from_labelme2coco_dataset(dataset_name,annotation_source_dir,sample_img_dest_dir,validation_img_dest_dir=""):
#initialize annbuilder
ann_builder = AnnBuild()
ann_builder.set_info("manual annotations of Inovonics and university provided data","inovonData","NA",datetime(2019,5,24))
ann_builder.add_license("TODO", "TODO")
coco = COCO(annotation_source_dir+dataset_name+".json")
print("Coco Loaded")
#reading
for img_id in coco.getImgIds():
take_samples_from_image(ann_builder, img_id, coco, sample_img_dest_dir)
#save
ann_builder.save_file(annotation_source_dir+dataset_name+"_sampled_data.json")
#validation images
if(validation_img_dest_dir != ""):
validation_coco = COCO(annotation_source_dir+dataset_name+"_sampled_data.json")
validation_images(dataset_name,validation_coco,validation_img_dest_dir)
def take_samples_from_image(ann_builder, img_id, coco, img_dest):
#set up image name
source_img_filename = coco.imgs[img_id]['file_name']
source_img = cv2.imread(data_directory_root+source_img_filename,cv2.IMREAD_COLOR)
img_name = source_img_filename[1:-4]
img_name = img_name[img_name.index("/"):]
img_name = img_name[1:]
#set up mirroring
mirrored_imgs = [source_img, np.fliplr(source_img), np.flipud(source_img), np.flipud(np.fliplr(source_img))]
mirror_tags = ["","_h","_v","_hv"]
print("Processing image "+str(img_id)+": "+img_name)
#run sampler
for m in range(0,len(mirrored_imgs)):
#load mirror of image
img = mirrored_imgs[m]
tag = mirror_tags[m]
mirrored_anns = mirror_coco_coordinates(coco,img_id,m)
if len(mirrored_anns) > 10:
#collect samples
for i in range(0,samples_per_image):
img_dest_path = data_directory_root+img_dest+img_name+tag+"_"+str(i)+".png"
take_sample(ann_builder, mirrored_anns, img, img_dest_path)
else:
img_dest_path = data_directory_root+img_dest+img_name+tag+".png"
take_full_image(ann_builder, mirrored_anns, img, img_dest_path)
def take_sample(ann_builder, annotations, img, img_dest_path):
#Take a random sample with at least a certain number of room bounding boxes overlapping
sample_annotations, cropped, room_count = random_sample_selection(annotations,img)
while(room_count < image_sample_room_count_threshold):
sample_annotations, cropped, room_count = random_sample_selection(annotations,img)
#sav the cropped image portion of the final sample
cv2.imwrite(img_dest_path, cropped)
print(" sample saved to "+img_dest_path)
sample_width, sample_height = get_image_size(img_dest_path)
sampled_img_id = ann_builder.add_image(img_dest_path, sample_width, sample_height)
#crop annotations
crop_area = define_crop_area(0,0,sample_width, sample_height)
add_cropped_annotations(ann_builder, sampled_img_id, sample_annotations, crop_area)
def take_full_image(ann_builder, annotations, img, img_dest_path):
#sav the cropped image portion of the final sample
cv2.imwrite(img_dest_path, img)
print(" whole image saved to "+img_dest_path)
width, height = get_image_size(img_dest_path)
img_id = ann_builder.add_image(img_dest_path, width, height)
for ann in annotations:
ann_builder.add_annotation(img_id, ann["category_id"], segmentation_to_polygon(ann["segmentation"]))
def random_sample_selection(annotations, img):
#get bounds of original image
init_width = len(img)
init_height = len(img[0])
#randomly select a rectangle
sample_x = random.randrange(0,init_width-min_sample_size-1)
sample_y = random.randrange(0,init_height-min_sample_size-1)
sample_width = random.randrange(min_sample_size,min(max_sample_size, init_width-sample_x))
sample_height = random.randrange(min_sample_size,min(max_sample_size, init_height-sample_y))
#create cropped image and offset annotation coordinates immediately for easier data transfer
cropped = img[sample_y:sample_y+sample_height,sample_x:sample_x+sample_width]
sampled_annotations, room_count = offset_annotation_coordinates(annotations,sample_x,sample_y,sample_width,sample_height)
#return values
return sampled_annotations, cropped, room_count
### Annotation Cropping ###
def define_crop_area(x, y, width, height):
cropped_area = geometry.Polygon([(x,y),
(x+width,y),
(x+width,y+height),
(x,y+height)])
return cropped_area
def add_cropped_annotations(ann_builder, img_id, annotations, cropped_area):
for ann in annotations:
#get intersecting area
poly = crop_polygon(ann["segmentation"],cropped_area,ann)
#handle convex rooms that weren't split
if(isinstance(poly,geometry.Polygon)):
ann_builder.add_annotation(img_id, ann["category_id"], poly)
#handle concave rooms that were split
elif(isinstance(poly,geometry.GeometryCollection) or isinstance(poly,geometry.MultiPolygon)):
for subpoly in poly.geoms:
if(isinstance(subpoly,geometry.Polygon)):
ann_builder.add_annotation(img_id, ann["category_id"], subpoly)
def crop_polygon(segmentation,crop_area,id):
#reformat into shapely geometry Polygon
poly = segmentation_to_polygon(segmentation)
#check shape validity (most common error is self overlapping)
if(not shapely.is_valid(poly)):
print(id)
print(shapely.is_valid_reason(poly))
return None
#check size and return
cropped_poly = shapely.intersection(poly,crop_area)
if(cropped_poly.area > 0):
return cropped_poly
else:
return None
def segmentation_to_polygon(segmentation):
points = np.array(segmentation[0])
points = points.reshape(int(len(segmentation[0])/2),2)
return geometry.Polygon([[p[0], p[1]] for p in points])
### Applying Geometry Modifications
def mirror_coco_coordinates(coco, img_id, mirroring_index):
#instantiate copy
original_annotations = coco.imgToAnns[img_id]
annotations = copy.deepcopy(original_annotations)
new_annotations = []
#get width
width = coco.imgs[img_id]['width']
height = coco.imgs[img_id]['height']
#apply mirroring
for ann in annotations:
if(ann["category_id"] in category_filter):
for i in range(0,len(ann['bbox'])):
apply_mirroring_to_coord(ann['bbox'], i, mirroring_index, width, height)
for i in range(0,len(ann['segmentation'][0])):
apply_mirroring_to_coord(ann['segmentation'][0], i, mirroring_index, width, height)
new_annotations.append(ann)
return new_annotations
def apply_mirroring_to_coord(array, index, mirroring_index, width, height):
if(index%2 == 0):
if(mirroring_index%2==1):
array[index] = width-array[index]
else:
if(mirroring_index>1):
array[index] = height-array[index]
def offset_annotation_coordinates(original_annotations,x_offset,y_offset,width,height):
#instantiate copy
annotations = copy.deepcopy(original_annotations)
new_annotations = []
room_count = 0
#apply offfset
for ann in annotations:
if(check_bounding_box_overlap(ann['bbox'], x_offset, y_offset, width, height)):
room_count += 1
for i in range(0,len(ann['bbox'])):
apply_offset_to_coord(ann['bbox'], i, x_offset, y_offset)
for i in range(0,len(ann['segmentation'][0])):
apply_offset_to_coord(ann['segmentation'][0], i, x_offset, y_offset)
new_annotations.append(ann)
return new_annotations, room_count
def apply_offset_to_coord(array, index, x_offset, y_offset):
if(index%2 == 0):
array[index] -= x_offset
else:
array[index] -= y_offset
def check_bounding_box_overlap(bbox, x_offset, y_offset, width, height):
boundary_threshold = 25 #so a row of rooms with a one pixel sliver within the area don't get counted
within_horizontal_bounds = max(bbox[0],bbox[2]) >= x_offset+boundary_threshold and min(bbox[0],bbox[2]) <= x_offset+width-boundary_threshold
within_vertical_bounds = max(bbox[1],bbox[3]) >= y_offset+boundary_threshold and min(bbox[1],bbox[3]) <= y_offset+height-boundary_threshold
return within_horizontal_bounds and within_vertical_bounds
### Validation Display ###
def validation_images(dataset_name,coco,validation_img_target_dir):
count = 1
for i in np.random.choice(coco.getImgIds(),8):
draw_from_coco(i,coco,data_directory_root+validation_img_target_dir+dataset_name+"_sampling_validation_"+str(count)+".png")
count+=1