|
|
import sys |
|
|
import os |
|
|
import logging |
|
|
import math |
|
|
import random |
|
|
import numpy as np |
|
|
import tensorflow as tf |
|
|
import scipy |
|
|
import skimage.color |
|
|
import skimage.io |
|
|
import skimage.transform |
|
|
import urllib.request |
|
|
import shutil |
|
|
import warnings |
|
|
from distutils.version import LooseVersion |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_bboxes(mask): |
|
|
"""Compute bounding boxes from masks. |
|
|
mask: [height, width, num_instances]. Mask pixels are either 1 or 0. |
|
|
|
|
|
Returns: bbox array [num_instances, (y1, x1, y2, x2)]. |
|
|
""" |
|
|
boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) |
|
|
for i in range(mask.shape[-1]): |
|
|
m = mask[:, :, i] |
|
|
|
|
|
horizontal_indicies = np.where(np.any(m, axis=0))[0] |
|
|
vertical_indicies = np.where(np.any(m, axis=1))[0] |
|
|
if horizontal_indicies.shape[0]: |
|
|
x1, x2 = horizontal_indicies[[0, -1]] |
|
|
y1, y2 = vertical_indicies[[0, -1]] |
|
|
|
|
|
x2 += 1 |
|
|
y2 += 1 |
|
|
else: |
|
|
|
|
|
|
|
|
x1, x2, y1, y2 = 0, 0, 0, 0 |
|
|
boxes[i] = np.array([y1, x1, y2, x2]) |
|
|
return boxes.astype(np.int32) |
|
|
|
|
|
|
|
|
def compute_iou(box, boxes, box_area, boxes_area): |
|
|
"""Calculates IoU of the given box with the array of the given boxes. |
|
|
box: 1D vector [y1, x1, y2, x2] |
|
|
boxes: [boxes_count, (y1, x1, y2, x2)] |
|
|
box_area: float. the area of 'box' |
|
|
boxes_area: array of length boxes_count. |
|
|
|
|
|
Note: the areas are passed in rather than calculated here for |
|
|
efficiency. Calculate once in the caller to avoid duplicate work. |
|
|
""" |
|
|
|
|
|
y1 = np.maximum(box[0], boxes[:, 0]) |
|
|
y2 = np.minimum(box[2], boxes[:, 2]) |
|
|
x1 = np.maximum(box[1], boxes[:, 1]) |
|
|
x2 = np.minimum(box[3], boxes[:, 3]) |
|
|
intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) |
|
|
union = box_area + boxes_area[:] - intersection[:] |
|
|
iou = intersection / union |
|
|
return iou |
|
|
|
|
|
|
|
|
def compute_overlaps(boxes1, boxes2): |
|
|
"""Computes IoU overlaps between two sets of boxes. |
|
|
boxes1, boxes2: [N, (y1, x1, y2, x2)]. |
|
|
|
|
|
For better performance, pass the largest set first and the smaller second. |
|
|
""" |
|
|
|
|
|
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) |
|
|
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) |
|
|
|
|
|
|
|
|
|
|
|
overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) |
|
|
for i in range(overlaps.shape[1]): |
|
|
box2 = boxes2[i] |
|
|
overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) |
|
|
return overlaps |
|
|
|
|
|
|
|
|
def compute_overlaps_masks(masks1, masks2): |
|
|
"""Computes IoU overlaps between two sets of masks. |
|
|
masks1, masks2: [Height, Width, instances] |
|
|
""" |
|
|
|
|
|
|
|
|
if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: |
|
|
return np.zeros((masks1.shape[-1], masks2.shape[-1])) |
|
|
|
|
|
masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) |
|
|
masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) |
|
|
area1 = np.sum(masks1, axis=0) |
|
|
area2 = np.sum(masks2, axis=0) |
|
|
|
|
|
|
|
|
intersections = np.dot(masks1.T, masks2) |
|
|
union = area1[:, None] + area2[None, :] - intersections |
|
|
overlaps = intersections / union |
|
|
|
|
|
return overlaps |
|
|
|
|
|
|
|
|
def non_max_suppression(boxes, scores, threshold): |
|
|
"""Performs non-maximum suppression and returns indices of kept boxes. |
|
|
boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. |
|
|
scores: 1-D array of box scores. |
|
|
threshold: Float. IoU threshold to use for filtering. |
|
|
""" |
|
|
assert boxes.shape[0] > 0 |
|
|
if boxes.dtype.kind != "f": |
|
|
boxes = boxes.astype(np.float32) |
|
|
|
|
|
|
|
|
y1 = boxes[:, 0] |
|
|
x1 = boxes[:, 1] |
|
|
y2 = boxes[:, 2] |
|
|
x2 = boxes[:, 3] |
|
|
area = (y2 - y1) * (x2 - x1) |
|
|
|
|
|
|
|
|
ixs = scores.argsort()[::-1] |
|
|
|
|
|
pick = [] |
|
|
while len(ixs) > 0: |
|
|
|
|
|
i = ixs[0] |
|
|
pick.append(i) |
|
|
|
|
|
iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) |
|
|
|
|
|
|
|
|
|
|
|
remove_ixs = np.where(iou > threshold)[0] + 1 |
|
|
|
|
|
ixs = np.delete(ixs, remove_ixs) |
|
|
ixs = np.delete(ixs, 0) |
|
|
return np.array(pick, dtype=np.int32) |
|
|
|
|
|
|
|
|
def apply_box_deltas(boxes, deltas): |
|
|
"""Applies the given deltas to the given boxes. |
|
|
boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. |
|
|
deltas: [N, (dy, dx, log(dh), log(dw))] |
|
|
""" |
|
|
boxes = boxes.astype(np.float32) |
|
|
|
|
|
height = boxes[:, 2] - boxes[:, 0] |
|
|
width = boxes[:, 3] - boxes[:, 1] |
|
|
center_y = boxes[:, 0] + 0.5 * height |
|
|
center_x = boxes[:, 1] + 0.5 * width |
|
|
|
|
|
center_y += deltas[:, 0] * height |
|
|
center_x += deltas[:, 1] * width |
|
|
height *= np.exp(deltas[:, 2]) |
|
|
width *= np.exp(deltas[:, 3]) |
|
|
|
|
|
y1 = center_y - 0.5 * height |
|
|
x1 = center_x - 0.5 * width |
|
|
y2 = y1 + height |
|
|
x2 = x1 + width |
|
|
return np.stack([y1, x1, y2, x2], axis=1) |
|
|
|
|
|
|
|
|
def box_refinement_graph(box, gt_box): |
|
|
"""Compute refinement needed to transform box to gt_box. |
|
|
box and gt_box are [N, (y1, x1, y2, x2)] |
|
|
""" |
|
|
box = tf.cast(box, tf.float32) |
|
|
gt_box = tf.cast(gt_box, tf.float32) |
|
|
|
|
|
height = box[:, 2] - box[:, 0] |
|
|
width = box[:, 3] - box[:, 1] |
|
|
center_y = box[:, 0] + 0.5 * height |
|
|
center_x = box[:, 1] + 0.5 * width |
|
|
|
|
|
gt_height = gt_box[:, 2] - gt_box[:, 0] |
|
|
gt_width = gt_box[:, 3] - gt_box[:, 1] |
|
|
gt_center_y = gt_box[:, 0] + 0.5 * gt_height |
|
|
gt_center_x = gt_box[:, 1] + 0.5 * gt_width |
|
|
|
|
|
dy = (gt_center_y - center_y) / height |
|
|
dx = (gt_center_x - center_x) / width |
|
|
dh = tf.log(gt_height / height) |
|
|
dw = tf.log(gt_width / width) |
|
|
|
|
|
result = tf.stack([dy, dx, dh, dw], axis=1) |
|
|
return result |
|
|
|
|
|
|
|
|
def box_refinement(box, gt_box): |
|
|
"""Compute refinement needed to transform box to gt_box. |
|
|
box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is |
|
|
assumed to be outside the box. |
|
|
""" |
|
|
box = box.astype(np.float32) |
|
|
gt_box = gt_box.astype(np.float32) |
|
|
|
|
|
height = box[:, 2] - box[:, 0] |
|
|
width = box[:, 3] - box[:, 1] |
|
|
center_y = box[:, 0] + 0.5 * height |
|
|
center_x = box[:, 1] + 0.5 * width |
|
|
|
|
|
gt_height = gt_box[:, 2] - gt_box[:, 0] |
|
|
gt_width = gt_box[:, 3] - gt_box[:, 1] |
|
|
gt_center_y = gt_box[:, 0] + 0.5 * gt_height |
|
|
gt_center_x = gt_box[:, 1] + 0.5 * gt_width |
|
|
|
|
|
dy = (gt_center_y - center_y) / height |
|
|
dx = (gt_center_x - center_x) / width |
|
|
dh = np.log(gt_height / height) |
|
|
dw = np.log(gt_width / width) |
|
|
|
|
|
return np.stack([dy, dx, dh, dw], axis=1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Dataset(object): |
|
|
"""The base class for dataset classes. |
|
|
To use it, create a new class that adds functions specific to the dataset |
|
|
you want to use. For example: |
|
|
|
|
|
class CatsAndDogsDataset(Dataset): |
|
|
def load_cats_and_dogs(self): |
|
|
... |
|
|
def load_mask(self, image_id): |
|
|
... |
|
|
def image_reference(self, image_id): |
|
|
... |
|
|
|
|
|
See COCODataset and ShapesDataset as examples. |
|
|
""" |
|
|
|
|
|
def __init__(self, class_map=None): |
|
|
self._image_ids = [] |
|
|
self.image_info = [] |
|
|
|
|
|
self.class_info = [{"source": "", "id": 0, "name": "BG"}] |
|
|
self.source_class_ids = {} |
|
|
|
|
|
def add_class(self, source, class_id, class_name): |
|
|
assert "." not in source, "Source name cannot contain a dot" |
|
|
|
|
|
for info in self.class_info: |
|
|
if info['source'] == source and info["id"] == class_id: |
|
|
|
|
|
return |
|
|
|
|
|
self.class_info.append({ |
|
|
"source": source, |
|
|
"id": class_id, |
|
|
"name": class_name, |
|
|
}) |
|
|
|
|
|
def add_image(self, source, image_id, path, **kwargs): |
|
|
image_info = { |
|
|
"id": image_id, |
|
|
"source": source, |
|
|
"path": path, |
|
|
} |
|
|
image_info.update(kwargs) |
|
|
self.image_info.append(image_info) |
|
|
|
|
|
def image_reference(self, image_id): |
|
|
"""Return a link to the image in its source Website or details about |
|
|
the image that help looking it up or debugging it. |
|
|
|
|
|
Override for your dataset, but pass to this function |
|
|
if you encounter images not in your dataset. |
|
|
""" |
|
|
return "" |
|
|
|
|
|
def prepare(self, class_map=None): |
|
|
"""Prepares the Dataset class for use. |
|
|
|
|
|
TODO: class map is not supported yet. When done, it should handle mapping |
|
|
classes from different datasets to the same class ID. |
|
|
""" |
|
|
|
|
|
def clean_name(name): |
|
|
"""Returns a shorter version of object names for cleaner display.""" |
|
|
return ",".join(name.split(",")[:1]) |
|
|
|
|
|
|
|
|
self.num_classes = len(self.class_info) |
|
|
self.class_ids = np.arange(self.num_classes) |
|
|
self.class_names = [clean_name(c["name"]) for c in self.class_info] |
|
|
self.num_images = len(self.image_info) |
|
|
self._image_ids = np.arange(self.num_images) |
|
|
|
|
|
|
|
|
self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id |
|
|
for info, id in zip(self.class_info, self.class_ids)} |
|
|
self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id |
|
|
for info, id in zip(self.image_info, self.image_ids)} |
|
|
|
|
|
|
|
|
self.sources = list(set([i['source'] for i in self.class_info])) |
|
|
self.source_class_ids = {} |
|
|
|
|
|
for source in self.sources: |
|
|
self.source_class_ids[source] = [] |
|
|
|
|
|
for i, info in enumerate(self.class_info): |
|
|
|
|
|
if i == 0 or source == info['source']: |
|
|
self.source_class_ids[source].append(i) |
|
|
|
|
|
def map_source_class_id(self, source_class_id): |
|
|
"""Takes a source class ID and returns the int class ID assigned to it. |
|
|
|
|
|
For example: |
|
|
dataset.map_source_class_id("coco.12") -> 23 |
|
|
""" |
|
|
return self.class_from_source_map[source_class_id] |
|
|
|
|
|
def get_source_class_id(self, class_id, source): |
|
|
"""Map an internal class ID to the corresponding class ID in the source dataset.""" |
|
|
info = self.class_info[class_id] |
|
|
assert info['source'] == source |
|
|
return info['id'] |
|
|
|
|
|
@property |
|
|
def image_ids(self): |
|
|
return self._image_ids |
|
|
|
|
|
def source_image_link(self, image_id): |
|
|
"""Returns the path or URL to the image. |
|
|
Override this to return a URL to the image if it's available online for easy |
|
|
debugging. |
|
|
""" |
|
|
return self.image_info[image_id]["path"] |
|
|
|
|
|
def load_image(self, image_id): |
|
|
"""Load the specified image and return a [H,W,3] Numpy array. |
|
|
""" |
|
|
|
|
|
image = skimage.io.imread(self.image_info[image_id]['path']) |
|
|
|
|
|
if image.ndim != 3: |
|
|
image = skimage.color.gray2rgb(image) |
|
|
|
|
|
if image.shape[-1] == 4: |
|
|
image = image[..., :3] |
|
|
return image |
|
|
|
|
|
def load_mask(self, image_id): |
|
|
"""Load instance masks for the given image. |
|
|
|
|
|
Different datasets use different ways to store masks. Override this |
|
|
method to load instance masks and return them in the form of am |
|
|
array of binary masks of shape [height, width, instances]. |
|
|
|
|
|
Returns: |
|
|
masks: A bool array of shape [height, width, instance count] with |
|
|
a binary mask per instance. |
|
|
class_ids: a 1D array of class IDs of the instance masks. |
|
|
""" |
|
|
|
|
|
|
|
|
logging.warning("You are using the default load_mask(), maybe you need to define your own one.") |
|
|
mask = np.empty([0, 0, 0]) |
|
|
class_ids = np.empty([0], np.int32) |
|
|
return mask, class_ids |
|
|
|
|
|
|
|
|
def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): |
|
|
"""Resizes an image keeping the aspect ratio unchanged. |
|
|
|
|
|
min_dim: if provided, resizes the image such that it's smaller |
|
|
dimension == min_dim |
|
|
max_dim: if provided, ensures that the image longest side doesn't |
|
|
exceed this value. |
|
|
min_scale: if provided, ensure that the image is scaled up by at least |
|
|
this percent even if min_dim doesn't require it. |
|
|
mode: Resizing mode. |
|
|
none: No resizing. Return the image unchanged. |
|
|
square: Resize and pad with zeros to get a square image |
|
|
of size [max_dim, max_dim]. |
|
|
pad64: Pads width and height with zeros to make them multiples of 64. |
|
|
If min_dim or min_scale are provided, it scales the image up |
|
|
before padding. max_dim is ignored in this mode. |
|
|
The multiple of 64 is needed to ensure smooth scaling of feature |
|
|
maps up and down the 6 levels of the FPN pyramid (2**6=64). |
|
|
crop: Picks random crops from the image. First, scales the image based |
|
|
on min_dim and min_scale, then picks a random crop of |
|
|
size min_dim x min_dim. Can be used in training only. |
|
|
max_dim is not used in this mode. |
|
|
|
|
|
Returns: |
|
|
image: the resized image |
|
|
window: (y1, x1, y2, x2). If max_dim is provided, padding might |
|
|
be inserted in the returned image. If so, this window is the |
|
|
coordinates of the image part of the full image (excluding |
|
|
the padding). The x2, y2 pixels are not included. |
|
|
scale: The scale factor used to resize the image |
|
|
padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] |
|
|
""" |
|
|
|
|
|
image_dtype = image.dtype |
|
|
|
|
|
h, w = image.shape[:2] |
|
|
window = (0, 0, h, w) |
|
|
scale = 1 |
|
|
padding = [(0, 0), (0, 0), (0, 0)] |
|
|
crop = None |
|
|
|
|
|
if mode == "none": |
|
|
return image, window, scale, padding, crop |
|
|
|
|
|
|
|
|
if min_dim: |
|
|
|
|
|
scale = max(1, min_dim / min(h, w)) |
|
|
if min_scale and scale < min_scale: |
|
|
scale = min_scale |
|
|
|
|
|
|
|
|
if max_dim and mode == "square": |
|
|
image_max = max(h, w) |
|
|
if round(image_max * scale) > max_dim: |
|
|
scale = max_dim / image_max |
|
|
|
|
|
|
|
|
if scale != 1: |
|
|
image = resize(image, (round(h * scale), round(w * scale)), |
|
|
preserve_range=True) |
|
|
|
|
|
|
|
|
if mode == "square": |
|
|
|
|
|
h, w = image.shape[:2] |
|
|
top_pad = (max_dim - h) // 2 |
|
|
bottom_pad = max_dim - h - top_pad |
|
|
left_pad = (max_dim - w) // 2 |
|
|
right_pad = max_dim - w - left_pad |
|
|
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] |
|
|
image = np.pad(image, padding, mode='constant', constant_values=0) |
|
|
window = (top_pad, left_pad, h + top_pad, w + left_pad) |
|
|
elif mode == "pad64": |
|
|
h, w = image.shape[:2] |
|
|
|
|
|
assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" |
|
|
|
|
|
if h % 64 > 0: |
|
|
max_h = h - (h % 64) + 64 |
|
|
top_pad = (max_h - h) // 2 |
|
|
bottom_pad = max_h - h - top_pad |
|
|
else: |
|
|
top_pad = bottom_pad = 0 |
|
|
|
|
|
if w % 64 > 0: |
|
|
max_w = w - (w % 64) + 64 |
|
|
left_pad = (max_w - w) // 2 |
|
|
right_pad = max_w - w - left_pad |
|
|
else: |
|
|
left_pad = right_pad = 0 |
|
|
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] |
|
|
image = np.pad(image, padding, mode='constant', constant_values=0) |
|
|
window = (top_pad, left_pad, h + top_pad, w + left_pad) |
|
|
elif mode == "crop": |
|
|
|
|
|
h, w = image.shape[:2] |
|
|
y = random.randint(0, (h - min_dim)) |
|
|
x = random.randint(0, (w - min_dim)) |
|
|
crop = (y, x, min_dim, min_dim) |
|
|
image = image[y:y + min_dim, x:x + min_dim] |
|
|
window = (0, 0, min_dim, min_dim) |
|
|
else: |
|
|
raise Exception("Mode {} not supported".format(mode)) |
|
|
return image.astype(image_dtype), window, scale, padding, crop |
|
|
|
|
|
|
|
|
def resize_mask(mask, scale, padding, crop=None): |
|
|
"""Resizes a mask using the given scale and padding. |
|
|
Typically, you get the scale and padding from resize_image() to |
|
|
ensure both, the image and the mask, are resized consistently. |
|
|
|
|
|
scale: mask scaling factor |
|
|
padding: Padding to add to the mask in the form |
|
|
[(top, bottom), (left, right), (0, 0)] |
|
|
""" |
|
|
|
|
|
|
|
|
with warnings.catch_warnings(): |
|
|
warnings.simplefilter("ignore") |
|
|
mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) |
|
|
if crop is not None: |
|
|
y, x, h, w = crop |
|
|
mask = mask[y:y + h, x:x + w] |
|
|
else: |
|
|
mask = np.pad(mask, padding, mode='constant', constant_values=0) |
|
|
return mask |
|
|
|
|
|
|
|
|
def minimize_mask(bbox, mask, mini_shape): |
|
|
"""Resize masks to a smaller version to reduce memory load. |
|
|
Mini-masks can be resized back to image scale using expand_masks() |
|
|
|
|
|
See inspect_data.ipynb notebook for more details. |
|
|
""" |
|
|
mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) |
|
|
for i in range(mask.shape[-1]): |
|
|
|
|
|
m = mask[:, :, i].astype(bool) |
|
|
y1, x1, y2, x2 = bbox[i][:4] |
|
|
m = m[y1:y2, x1:x2] |
|
|
if m.size == 0: |
|
|
raise Exception("Invalid bounding box with area of zero") |
|
|
|
|
|
m = resize(m, mini_shape) |
|
|
mini_mask[:, :, i] = np.around(m).astype(np.bool) |
|
|
return mini_mask |
|
|
|
|
|
|
|
|
def expand_mask(bbox, mini_mask, image_shape): |
|
|
"""Resizes mini masks back to image size. Reverses the change |
|
|
of minimize_mask(). |
|
|
|
|
|
See inspect_data.ipynb notebook for more details. |
|
|
""" |
|
|
mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) |
|
|
for i in range(mask.shape[-1]): |
|
|
m = mini_mask[:, :, i] |
|
|
y1, x1, y2, x2 = bbox[i][:4] |
|
|
h = y2 - y1 |
|
|
w = x2 - x1 |
|
|
|
|
|
m = resize(m, (h, w)) |
|
|
mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) |
|
|
return mask |
|
|
|
|
|
|
|
|
|
|
|
def mold_mask(mask, config): |
|
|
pass |
|
|
|
|
|
|
|
|
def unmold_mask(mask, bbox, image_shape): |
|
|
"""Converts a mask generated by the neural network to a format similar |
|
|
to its original shape. |
|
|
mask: [height, width] of type float. A small, typically 28x28 mask. |
|
|
bbox: [y1, x1, y2, x2]. The box to fit the mask in. |
|
|
|
|
|
Returns a binary mask with the same size as the original image. |
|
|
""" |
|
|
threshold = 0.5 |
|
|
y1, x1, y2, x2 = bbox |
|
|
mask = resize(mask, (y2 - y1, x2 - x1)) |
|
|
mask = np.where(mask >= threshold, 1, 0).astype(np.bool) |
|
|
|
|
|
|
|
|
full_mask = np.zeros(image_shape[:2], dtype=np.bool) |
|
|
full_mask[y1:y2, x1:x2] = mask |
|
|
return full_mask |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): |
|
|
""" |
|
|
scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] |
|
|
ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] |
|
|
shape: [height, width] spatial shape of the feature map over which |
|
|
to generate anchors. |
|
|
feature_stride: Stride of the feature map relative to the image in pixels. |
|
|
anchor_stride: Stride of anchors on the feature map. For example, if the |
|
|
value is 2 then generate anchors for every other feature map pixel. |
|
|
""" |
|
|
|
|
|
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) |
|
|
scales = scales.flatten() |
|
|
ratios = ratios.flatten() |
|
|
|
|
|
|
|
|
heights = scales / np.sqrt(ratios) |
|
|
widths = scales * np.sqrt(ratios) |
|
|
|
|
|
|
|
|
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride |
|
|
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride |
|
|
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) |
|
|
|
|
|
|
|
|
box_widths, box_centers_x = np.meshgrid(widths, shifts_x) |
|
|
box_heights, box_centers_y = np.meshgrid(heights, shifts_y) |
|
|
|
|
|
|
|
|
box_centers = np.stack( |
|
|
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) |
|
|
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) |
|
|
|
|
|
|
|
|
boxes = np.concatenate([box_centers - 0.5 * box_sizes, |
|
|
box_centers + 0.5 * box_sizes], axis=1) |
|
|
return boxes |
|
|
|
|
|
|
|
|
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, |
|
|
anchor_stride): |
|
|
"""Generate anchors at different levels of a feature pyramid. Each scale |
|
|
is associated with a level of the pyramid, but each ratio is used in |
|
|
all levels of the pyramid. |
|
|
|
|
|
Returns: |
|
|
anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted |
|
|
with the same order of the given scales. So, anchors of scale[0] come |
|
|
first, then anchors of scale[1], and so on. |
|
|
""" |
|
|
|
|
|
|
|
|
anchors = [] |
|
|
for i in range(len(scales)): |
|
|
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], |
|
|
feature_strides[i], anchor_stride)) |
|
|
return np.concatenate(anchors, axis=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def trim_zeros(x): |
|
|
"""It's common to have tensors larger than the available data and |
|
|
pad with zeros. This function removes rows that are all zeros. |
|
|
|
|
|
x: [rows, columns]. |
|
|
""" |
|
|
assert len(x.shape) == 2 |
|
|
return x[~np.all(x == 0, axis=1)] |
|
|
|
|
|
|
|
|
def compute_matches(gt_boxes, gt_class_ids, gt_masks, |
|
|
pred_boxes, pred_class_ids, pred_scores, pred_masks, |
|
|
iou_threshold=0.5, score_threshold=0.0): |
|
|
"""Finds matches between prediction and ground truth instances. |
|
|
|
|
|
Returns: |
|
|
gt_match: 1-D array. For each GT box it has the index of the matched |
|
|
predicted box. |
|
|
pred_match: 1-D array. For each predicted box, it has the index of |
|
|
the matched ground truth box. |
|
|
overlaps: [pred_boxes, gt_boxes] IoU overlaps. |
|
|
""" |
|
|
|
|
|
|
|
|
gt_boxes = trim_zeros(gt_boxes) |
|
|
gt_masks = gt_masks[..., :gt_boxes.shape[0]] |
|
|
pred_boxes = trim_zeros(pred_boxes) |
|
|
pred_scores = pred_scores[:pred_boxes.shape[0]] |
|
|
|
|
|
indices = np.argsort(pred_scores)[::-1] |
|
|
pred_boxes = pred_boxes[indices] |
|
|
pred_class_ids = pred_class_ids[indices] |
|
|
pred_scores = pred_scores[indices] |
|
|
pred_masks = pred_masks[..., indices] |
|
|
|
|
|
|
|
|
overlaps = compute_overlaps_masks(pred_masks, gt_masks) |
|
|
|
|
|
|
|
|
match_count = 0 |
|
|
pred_match = -1 * np.ones([pred_boxes.shape[0]]) |
|
|
gt_match = -1 * np.ones([gt_boxes.shape[0]]) |
|
|
for i in range(len(pred_boxes)): |
|
|
|
|
|
|
|
|
sorted_ixs = np.argsort(overlaps[i])[::-1] |
|
|
|
|
|
low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] |
|
|
if low_score_idx.size > 0: |
|
|
sorted_ixs = sorted_ixs[:low_score_idx[0]] |
|
|
|
|
|
for j in sorted_ixs: |
|
|
|
|
|
if gt_match[j] > -1: |
|
|
continue |
|
|
|
|
|
iou = overlaps[i, j] |
|
|
if iou < iou_threshold: |
|
|
break |
|
|
|
|
|
if pred_class_ids[i] == gt_class_ids[j]: |
|
|
match_count += 1 |
|
|
gt_match[j] = i |
|
|
pred_match[i] = j |
|
|
break |
|
|
|
|
|
return gt_match, pred_match, overlaps |
|
|
|
|
|
|
|
|
def compute_ap(gt_boxes, gt_class_ids, gt_masks, |
|
|
pred_boxes, pred_class_ids, pred_scores, pred_masks, |
|
|
iou_threshold=0.5): |
|
|
"""Compute Average Precision at a set IoU threshold (default 0.5). |
|
|
|
|
|
Returns: |
|
|
mAP: Mean Average Precision |
|
|
precisions: List of precisions at different class score thresholds. |
|
|
recalls: List of recall values at different class score thresholds. |
|
|
overlaps: [pred_boxes, gt_boxes] IoU overlaps. |
|
|
""" |
|
|
|
|
|
gt_match, pred_match, overlaps = compute_matches( |
|
|
gt_boxes, gt_class_ids, gt_masks, |
|
|
pred_boxes, pred_class_ids, pred_scores, pred_masks, |
|
|
iou_threshold) |
|
|
|
|
|
|
|
|
precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) |
|
|
recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) |
|
|
|
|
|
|
|
|
precisions = np.concatenate([[0], precisions, [0]]) |
|
|
recalls = np.concatenate([[0], recalls, [1]]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(len(precisions) - 2, -1, -1): |
|
|
precisions[i] = np.maximum(precisions[i], precisions[i + 1]) |
|
|
|
|
|
|
|
|
indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 |
|
|
mAP = np.sum((recalls[indices] - recalls[indices - 1]) * |
|
|
precisions[indices]) |
|
|
|
|
|
return mAP, precisions, recalls, overlaps |
|
|
|
|
|
|
|
|
def compute_ap_range(gt_box, gt_class_id, gt_mask, |
|
|
pred_box, pred_class_id, pred_score, pred_mask, |
|
|
iou_thresholds=None, verbose=1): |
|
|
"""Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" |
|
|
|
|
|
iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) |
|
|
|
|
|
|
|
|
AP = [] |
|
|
for iou_threshold in iou_thresholds: |
|
|
ap, precisions, recalls, overlaps =\ |
|
|
compute_ap(gt_box, gt_class_id, gt_mask, |
|
|
pred_box, pred_class_id, pred_score, pred_mask, |
|
|
iou_threshold=iou_threshold) |
|
|
if verbose: |
|
|
print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) |
|
|
AP.append(ap) |
|
|
AP = np.array(AP).mean() |
|
|
if verbose: |
|
|
print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( |
|
|
iou_thresholds[0], iou_thresholds[-1], AP)) |
|
|
return AP |
|
|
|
|
|
|
|
|
def compute_recall(pred_boxes, gt_boxes, iou): |
|
|
"""Compute the recall at the given IoU threshold. It's an indication |
|
|
of how many GT boxes were found by the given prediction boxes. |
|
|
|
|
|
pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates |
|
|
gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates |
|
|
""" |
|
|
|
|
|
overlaps = compute_overlaps(pred_boxes, gt_boxes) |
|
|
iou_max = np.max(overlaps, axis=1) |
|
|
iou_argmax = np.argmax(overlaps, axis=1) |
|
|
positive_ids = np.where(iou_max >= iou)[0] |
|
|
matched_gt_boxes = iou_argmax[positive_ids] |
|
|
|
|
|
recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] |
|
|
return recall, positive_ids |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def batch_slice(inputs, graph_fn, batch_size, names=None): |
|
|
"""Splits inputs into slices and feeds each slice to a copy of the given |
|
|
computation graph and then combines the results. It allows you to run a |
|
|
graph on a batch of inputs even if the graph is written to support one |
|
|
instance only. |
|
|
|
|
|
inputs: list of tensors. All must have the same first dimension length |
|
|
graph_fn: A function that returns a TF tensor that's part of a graph. |
|
|
batch_size: number of slices to divide the data into. |
|
|
names: If provided, assigns names to the resulting tensors. |
|
|
""" |
|
|
if not isinstance(inputs, list): |
|
|
inputs = [inputs] |
|
|
|
|
|
outputs = [] |
|
|
for i in range(batch_size): |
|
|
inputs_slice = [x[i] for x in inputs] |
|
|
output_slice = graph_fn(*inputs_slice) |
|
|
if not isinstance(output_slice, (tuple, list)): |
|
|
output_slice = [output_slice] |
|
|
outputs.append(output_slice) |
|
|
|
|
|
|
|
|
|
|
|
outputs = list(zip(*outputs)) |
|
|
|
|
|
if names is None: |
|
|
names = [None] * len(outputs) |
|
|
|
|
|
result = [tf.stack(o, axis=0, name=n) |
|
|
for o, n in zip(outputs, names)] |
|
|
if len(result) == 1: |
|
|
result = result[0] |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
def download_trained_weights(coco_model_path, verbose=1): |
|
|
"""Download COCO trained weights from Releases. |
|
|
|
|
|
coco_model_path: local path of COCO trained weights |
|
|
""" |
|
|
if verbose > 0: |
|
|
print("Downloading pretrained model to " + coco_model_path + " ...") |
|
|
with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: |
|
|
shutil.copyfileobj(resp, out) |
|
|
if verbose > 0: |
|
|
print("... done downloading pretrained model!") |
|
|
|
|
|
|
|
|
def norm_boxes(boxes, shape): |
|
|
"""Converts boxes from pixel coordinates to normalized coordinates. |
|
|
boxes: [N, (y1, x1, y2, x2)] in pixel coordinates |
|
|
shape: [..., (height, width)] in pixels |
|
|
|
|
|
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized |
|
|
coordinates it's inside the box. |
|
|
|
|
|
Returns: |
|
|
[N, (y1, x1, y2, x2)] in normalized coordinates |
|
|
""" |
|
|
h, w = shape |
|
|
scale = np.array([h - 1, w - 1, h - 1, w - 1]) |
|
|
shift = np.array([0, 0, 1, 1]) |
|
|
return np.divide((boxes - shift), scale).astype(np.float32) |
|
|
|
|
|
|
|
|
def denorm_boxes(boxes, shape): |
|
|
"""Converts boxes from normalized coordinates to pixel coordinates. |
|
|
boxes: [N, (y1, x1, y2, x2)] in normalized coordinates |
|
|
shape: [..., (height, width)] in pixels |
|
|
|
|
|
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized |
|
|
coordinates it's inside the box. |
|
|
|
|
|
Returns: |
|
|
[N, (y1, x1, y2, x2)] in pixel coordinates |
|
|
""" |
|
|
h, w = shape |
|
|
scale = np.array([h - 1, w - 1, h - 1, w - 1]) |
|
|
shift = np.array([0, 0, 1, 1]) |
|
|
return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) |
|
|
|
|
|
|
|
|
def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, |
|
|
preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): |
|
|
"""A wrapper for Scikit-Image resize(). |
|
|
|
|
|
Scikit-Image generates warnings on every call to resize() if it doesn't |
|
|
receive the right parameters. The right parameters depend on the version |
|
|
of skimage. This solves the problem by using different parameters per |
|
|
version. And it provides a central place to control resizing defaults. |
|
|
""" |
|
|
if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): |
|
|
|
|
|
|
|
|
return skimage.transform.resize( |
|
|
image, output_shape, |
|
|
order=order, mode=mode, cval=cval, clip=clip, |
|
|
preserve_range=preserve_range, anti_aliasing=anti_aliasing, |
|
|
anti_aliasing_sigma=anti_aliasing_sigma) |
|
|
else: |
|
|
return skimage.transform.resize( |
|
|
image, output_shape, |
|
|
order=order, mode=mode, cval=cval, clip=clip, |
|
|
preserve_range=preserve_range) |