iasjkk commited on
Commit
45cd6a0
·
verified ·
1 Parent(s): 9c5396c

Upload 6 files

Browse files
Files changed (6) hide show
  1. __init__.py +1 -0
  2. config.py +236 -0
  3. model.py +0 -0
  4. parallel_model.py +175 -0
  5. utils.py +908 -0
  6. visualize.py +500 -0
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
config.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Base Configurations class.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import numpy as np
11
+
12
+
13
+ # Base Configuration Class
14
+ # Don't use this class directly. Instead, sub-class it and override
15
+ # the configurations you need to change.
16
+
17
+ class Config(object):
18
+ """Base configuration class. For custom configurations, create a
19
+ sub-class that inherits from this one and override properties
20
+ that need to be changed.
21
+ """
22
+ # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
23
+ # Useful if your code needs to do things differently depending on which
24
+ # experiment is running.
25
+ NAME = None # Override in sub-classes
26
+
27
+ # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
28
+ GPU_COUNT = 1
29
+
30
+ # Number of images to train with on each GPU. A 12GB GPU can typically
31
+ # handle 2 images of 1024x1024px.
32
+ # Adjust based on your GPU memory and image sizes. Use the highest
33
+ # number that your GPU can handle for best performance.
34
+ IMAGES_PER_GPU = 2
35
+
36
+ # Number of training steps per epoch
37
+ # This doesn't need to match the size of the training set. Tensorboard
38
+ # updates are saved at the end of each epoch, so setting this to a
39
+ # smaller number means getting more frequent TensorBoard updates.
40
+ # Validation stats are also calculated at each epoch end and they
41
+ # might take a while, so don't set this too small to avoid spending
42
+ # a lot of time on validation stats.
43
+ STEPS_PER_EPOCH = 1000
44
+
45
+ # Number of validation steps to run at the end of every training epoch.
46
+ # A bigger number improves accuracy of validation stats, but slows
47
+ # down the training.
48
+ VALIDATION_STEPS = 50
49
+
50
+ # Backbone network architecture
51
+ # Supported values are: resnet50, resnet101.
52
+ # You can also provide a callable that should have the signature
53
+ # of model.resnet_graph. If you do so, you need to supply a callable
54
+ # to COMPUTE_BACKBONE_SHAPE as well
55
+ BACKBONE = "resnet101"
56
+
57
+ # Only useful if you supply a callable to BACKBONE. Should compute
58
+ # the shape of each layer of the FPN Pyramid.
59
+ # See model.compute_backbone_shapes
60
+ COMPUTE_BACKBONE_SHAPE = None
61
+
62
+ # The strides of each layer of the FPN Pyramid. These values
63
+ # are based on a Resnet101 backbone.
64
+ BACKBONE_STRIDES = [4, 8, 16, 32, 64]
65
+
66
+ # Size of the fully-connected layers in the classification graph
67
+ FPN_CLASSIF_FC_LAYERS_SIZE = 1024
68
+
69
+ # Size of the top-down layers used to build the feature pyramid
70
+ TOP_DOWN_PYRAMID_SIZE = 256
71
+
72
+ # Number of classification classes (including background)
73
+ NUM_CLASSES = 1 # Override in sub-classes
74
+
75
+ # Length of square anchor side in pixels
76
+ RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
77
+
78
+ # Ratios of anchors at each cell (width/height)
79
+ # A value of 1 represents a square anchor, and 0.5 is a wide anchor
80
+ RPN_ANCHOR_RATIOS = [0.5, 1, 2]
81
+
82
+ # Anchor stride
83
+ # If 1 then anchors are created for each cell in the backbone feature map.
84
+ # If 2, then anchors are created for every other cell, and so on.
85
+ RPN_ANCHOR_STRIDE = 1
86
+
87
+ # Non-max suppression threshold to filter RPN proposals.
88
+ # You can increase this during training to generate more propsals.
89
+ RPN_NMS_THRESHOLD = 0.7
90
+
91
+ # How many anchors per image to use for RPN training
92
+ RPN_TRAIN_ANCHORS_PER_IMAGE = 256
93
+
94
+ # ROIs kept after tf.nn.top_k and before non-maximum suppression
95
+ PRE_NMS_LIMIT = 6000
96
+
97
+ # ROIs kept after non-maximum suppression (training and inference)
98
+ POST_NMS_ROIS_TRAINING = 2000
99
+ POST_NMS_ROIS_INFERENCE = 1000
100
+
101
+ # If enabled, resizes instance masks to a smaller size to reduce
102
+ # memory load. Recommended when using high-resolution images.
103
+ USE_MINI_MASK = True
104
+ MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
105
+
106
+ # Input image resizing
107
+ # Generally, use the "square" resizing mode for training and predicting
108
+ # and it should work well in most cases. In this mode, images are scaled
109
+ # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
110
+ # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
111
+ # padded with zeros to make it a square so multiple images can be put
112
+ # in one batch.
113
+ # Available resizing modes:
114
+ # none: No resizing or padding. Return the image unchanged.
115
+ # square: Resize and pad with zeros to get a square image
116
+ # of size [max_dim, max_dim].
117
+ # pad64: Pads width and height with zeros to make them multiples of 64.
118
+ # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
119
+ # up before padding. IMAGE_MAX_DIM is ignored in this mode.
120
+ # The multiple of 64 is needed to ensure smooth scaling of feature
121
+ # maps up and down the 6 levels of the FPN pyramid (2**6=64).
122
+ # crop: Picks random crops from the image. First, scales the image based
123
+ # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
124
+ # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
125
+ # IMAGE_MAX_DIM is not used in this mode.
126
+ IMAGE_RESIZE_MODE = "square"
127
+ IMAGE_MIN_DIM = 800
128
+ IMAGE_MAX_DIM = 1024
129
+ # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
130
+ # up scaling. For example, if set to 2 then images are scaled up to double
131
+ # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
132
+ # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
133
+ IMAGE_MIN_SCALE = 0
134
+ # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
135
+ # Changing this requires other changes in the code. See the WIKI for more
136
+ # details: https://github.com/matterport/Mask_RCNN/wiki
137
+ IMAGE_CHANNEL_COUNT = 3
138
+
139
+ # Image mean (RGB)
140
+ MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
141
+
142
+ # Number of ROIs per image to feed to classifier/mask heads
143
+ # The Mask RCNN paper uses 512 but often the RPN doesn't generate
144
+ # enough positive proposals to fill this and keep a positive:negative
145
+ # ratio of 1:3. You can increase the number of proposals by adjusting
146
+ # the RPN NMS threshold.
147
+ TRAIN_ROIS_PER_IMAGE = 200
148
+
149
+ # Percent of positive ROIs used to train classifier/mask heads
150
+ ROI_POSITIVE_RATIO = 0.33
151
+
152
+ # Pooled ROIs
153
+ POOL_SIZE = 7
154
+ MASK_POOL_SIZE = 14
155
+
156
+ # Shape of output mask
157
+ # To change this you also need to change the neural network mask branch
158
+ MASK_SHAPE = [28, 28]
159
+
160
+ # Maximum number of ground truth instances to use in one image
161
+ MAX_GT_INSTANCES = 100
162
+
163
+ # Bounding box refinement standard deviation for RPN and final detections.
164
+ RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
165
+ BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
166
+
167
+ # Max number of final detections
168
+ DETECTION_MAX_INSTANCES = 100
169
+
170
+ # Minimum probability value to accept a detected instance
171
+ # ROIs below this threshold are skipped
172
+ DETECTION_MIN_CONFIDENCE = 0.7
173
+
174
+ # Non-maximum suppression threshold for detection
175
+ DETECTION_NMS_THRESHOLD = 0.3
176
+
177
+ # Learning rate and momentum
178
+ # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
179
+ # weights to explode. Likely due to differences in optimizer
180
+ # implementation.
181
+ LEARNING_RATE = 0.001
182
+ LEARNING_MOMENTUM = 0.9
183
+
184
+ # Weight decay regularization
185
+ WEIGHT_DECAY = 0.0001
186
+
187
+ # Loss weights for more precise optimization.
188
+ # Can be used for R-CNN training setup.
189
+ LOSS_WEIGHTS = {
190
+ "rpn_class_loss": 1.,
191
+ "rpn_bbox_loss": 1.,
192
+ "mrcnn_class_loss": 1.,
193
+ "mrcnn_bbox_loss": 1.,
194
+ "mrcnn_mask_loss": 1.
195
+ }
196
+
197
+ # Use RPN ROIs or externally generated ROIs for training
198
+ # Keep this True for most situations. Set to False if you want to train
199
+ # the head branches on ROI generated by code rather than the ROIs from
200
+ # the RPN. For example, to debug the classifier head without having to
201
+ # train the RPN.
202
+ USE_RPN_ROIS = True
203
+
204
+ # Train or freeze batch normalization layers
205
+ # None: Train BN layers. This is the normal mode
206
+ # False: Freeze BN layers. Good when using a small batch size
207
+ # True: (don't use). Set layer in training mode even when predicting
208
+ TRAIN_BN = False # Defaulting to False since batch size is often small
209
+
210
+ # Gradient norm clipping
211
+ GRADIENT_CLIP_NORM = 5.0
212
+
213
+ def __init__(self):
214
+ """Set values of computed attributes."""
215
+ # Effective batch size
216
+ self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
217
+
218
+ # Input image size
219
+ if self.IMAGE_RESIZE_MODE == "crop":
220
+ self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
221
+ self.IMAGE_CHANNEL_COUNT])
222
+ else:
223
+ self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
224
+ self.IMAGE_CHANNEL_COUNT])
225
+
226
+ # Image meta data length
227
+ # See compose_image_meta() for details
228
+ self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
229
+
230
+ def display(self):
231
+ """Display Configuration values."""
232
+ print("\nConfigurations:")
233
+ for a in dir(self):
234
+ if not a.startswith("__") and not callable(getattr(self, a)):
235
+ print("{:30} {}".format(a, getattr(self, a)))
236
+ print("\n")
model.py ADDED
The diff for this file is too large to render. See raw diff
 
parallel_model.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Multi-GPU Support for Keras.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+
9
+ Ideas and a small code snippets from these sources:
10
+ https://github.com/fchollet/keras/issues/2436
11
+ https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12
+ https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13
+ https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14
+ """
15
+
16
+ import tensorflow as tf
17
+ import keras.backend as K
18
+ import keras.layers as KL
19
+ import keras.models as KM
20
+
21
+
22
+ class ParallelModel(KM.Model):
23
+ """Subclasses the standard Keras Model and adds multi-GPU support.
24
+ It works by creating a copy of the model on each GPU. Then it slices
25
+ the inputs and sends a slice to each copy of the model, and then
26
+ merges the outputs together and applies the loss on the combined
27
+ outputs.
28
+ """
29
+
30
+ def __init__(self, keras_model, gpu_count):
31
+ """Class constructor.
32
+ keras_model: The Keras model to parallelize
33
+ gpu_count: Number of GPUs. Must be > 1
34
+ """
35
+ self.inner_model = keras_model
36
+ self.gpu_count = gpu_count
37
+ merged_outputs = self.make_parallel()
38
+ super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39
+ outputs=merged_outputs)
40
+
41
+ def __getattribute__(self, attrname):
42
+ """Redirect loading and saving methods to the inner model. That's where
43
+ the weights are stored."""
44
+ if 'load' in attrname or 'save' in attrname:
45
+ return getattr(self.inner_model, attrname)
46
+ return super(ParallelModel, self).__getattribute__(attrname)
47
+
48
+ def summary(self, *args, **kwargs):
49
+ """Override summary() to display summaries of both, the wrapper
50
+ and inner models."""
51
+ super(ParallelModel, self).summary(*args, **kwargs)
52
+ self.inner_model.summary(*args, **kwargs)
53
+
54
+ def make_parallel(self):
55
+ """Creates a new wrapper model that consists of multiple replicas of
56
+ the original model placed on different GPUs.
57
+ """
58
+ # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59
+ # of the full inputs to all GPUs. Saves on bandwidth and memory.
60
+ input_slices = {name: tf.split(x, self.gpu_count)
61
+ for name, x in zip(self.inner_model.input_names,
62
+ self.inner_model.inputs)}
63
+
64
+ output_names = self.inner_model.output_names
65
+ outputs_all = []
66
+ for i in range(len(self.inner_model.outputs)):
67
+ outputs_all.append([])
68
+
69
+ # Run the model call() on each GPU to place the ops there
70
+ for i in range(self.gpu_count):
71
+ with tf.device('/gpu:%d' % i):
72
+ with tf.name_scope('tower_%d' % i):
73
+ # Run a slice of inputs through this replica
74
+ zipped_inputs = zip(self.inner_model.input_names,
75
+ self.inner_model.inputs)
76
+ inputs = [
77
+ KL.Lambda(lambda s: input_slices[name][i],
78
+ output_shape=lambda s: (None,) + s[1:])(tensor)
79
+ for name, tensor in zipped_inputs]
80
+ # Create the model replica and get the outputs
81
+ outputs = self.inner_model(inputs)
82
+ if not isinstance(outputs, list):
83
+ outputs = [outputs]
84
+ # Save the outputs for merging back together later
85
+ for l, o in enumerate(outputs):
86
+ outputs_all[l].append(o)
87
+
88
+ # Merge outputs on CPU
89
+ with tf.device('/cpu:0'):
90
+ merged = []
91
+ for outputs, name in zip(outputs_all, output_names):
92
+ # Concatenate or average outputs?
93
+ # Outputs usually have a batch dimension and we concatenate
94
+ # across it. If they don't, then the output is likely a loss
95
+ # or a metric value that gets averaged across the batch.
96
+ # Keras expects losses and metrics to be scalars.
97
+ if K.int_shape(outputs[0]) == ():
98
+ # Average
99
+ m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs)
100
+ else:
101
+ # Concatenate
102
+ m = KL.Concatenate(axis=0, name=name)(outputs)
103
+ merged.append(m)
104
+ return merged
105
+
106
+
107
+ if __name__ == "__main__":
108
+ # Testing code below. It creates a simple model to train on MNIST and
109
+ # tries to run it on 2 GPUs. It saves the graph so it can be viewed
110
+ # in TensorBoard. Run it as:
111
+ #
112
+ # python3 parallel_model.py
113
+
114
+ import os
115
+ import numpy as np
116
+ import keras.optimizers
117
+ from keras.datasets import mnist
118
+ from keras.preprocessing.image import ImageDataGenerator
119
+
120
+ GPU_COUNT = 2
121
+
122
+ # Root directory of the project
123
+ ROOT_DIR = os.path.abspath("../")
124
+
125
+ # Directory to save logs and trained model
126
+ MODEL_DIR = os.path.join(ROOT_DIR, "logs")
127
+
128
+ def build_model(x_train, num_classes):
129
+ # Reset default graph. Keras leaves old ops in the graph,
130
+ # which are ignored for execution but clutter graph
131
+ # visualization in TensorBoard.
132
+ tf.reset_default_graph()
133
+
134
+ inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
135
+ x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
136
+ name="conv1")(inputs)
137
+ x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
138
+ name="conv2")(x)
139
+ x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
140
+ x = KL.Flatten(name="flat1")(x)
141
+ x = KL.Dense(128, activation='relu', name="dense1")(x)
142
+ x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
143
+
144
+ return KM.Model(inputs, x, "digit_classifier_model")
145
+
146
+ # Load MNIST Data
147
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
148
+ x_train = np.expand_dims(x_train, -1).astype('float32') / 255
149
+ x_test = np.expand_dims(x_test, -1).astype('float32') / 255
150
+
151
+ print('x_train shape:', x_train.shape)
152
+ print('x_test shape:', x_test.shape)
153
+
154
+ # Build data generator and model
155
+ datagen = ImageDataGenerator()
156
+ model = build_model(x_train, 10)
157
+
158
+ # Add multi-GPU support.
159
+ model = ParallelModel(model, GPU_COUNT)
160
+
161
+ optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
162
+
163
+ model.compile(loss='sparse_categorical_crossentropy',
164
+ optimizer=optimizer, metrics=['accuracy'])
165
+
166
+ model.summary()
167
+
168
+ # Train
169
+ model.fit_generator(
170
+ datagen.flow(x_train, y_train, batch_size=64),
171
+ steps_per_epoch=50, epochs=10, verbose=1,
172
+ validation_data=(x_test, y_test),
173
+ callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
174
+ write_graph=True)]
175
+ )
utils.py ADDED
@@ -0,0 +1,908 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Common utility functions and classes.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import sys
11
+ import os
12
+ import logging
13
+ import math
14
+ import random
15
+ import numpy as np
16
+ import tensorflow as tf
17
+ import scipy
18
+ import skimage.color
19
+ import skimage.io
20
+ import skimage.transform
21
+ import urllib.request
22
+ import shutil
23
+ import warnings
24
+ from distutils.version import LooseVersion
25
+
26
+ # URL from which to download the latest COCO trained weights
27
+ COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
28
+
29
+
30
+ ############################################################
31
+ # Bounding Boxes
32
+ ############################################################
33
+
34
+ def extract_bboxes(mask):
35
+ """Compute bounding boxes from masks.
36
+ mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
37
+
38
+ Returns: bbox array [num_instances, (y1, x1, y2, x2)].
39
+ """
40
+ boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
41
+ for i in range(mask.shape[-1]):
42
+ m = mask[:, :, i]
43
+ # Bounding box.
44
+ horizontal_indicies = np.where(np.any(m, axis=0))[0]
45
+ vertical_indicies = np.where(np.any(m, axis=1))[0]
46
+ if horizontal_indicies.shape[0]:
47
+ x1, x2 = horizontal_indicies[[0, -1]]
48
+ y1, y2 = vertical_indicies[[0, -1]]
49
+ # x2 and y2 should not be part of the box. Increment by 1.
50
+ x2 += 1
51
+ y2 += 1
52
+ else:
53
+ # No mask for this instance. Might happen due to
54
+ # resizing or cropping. Set bbox to zeros
55
+ x1, x2, y1, y2 = 0, 0, 0, 0
56
+ boxes[i] = np.array([y1, x1, y2, x2])
57
+ return boxes.astype(np.int32)
58
+
59
+
60
+ def compute_iou(box, boxes, box_area, boxes_area):
61
+ """Calculates IoU of the given box with the array of the given boxes.
62
+ box: 1D vector [y1, x1, y2, x2]
63
+ boxes: [boxes_count, (y1, x1, y2, x2)]
64
+ box_area: float. the area of 'box'
65
+ boxes_area: array of length boxes_count.
66
+
67
+ Note: the areas are passed in rather than calculated here for
68
+ efficiency. Calculate once in the caller to avoid duplicate work.
69
+ """
70
+ # Calculate intersection areas
71
+ y1 = np.maximum(box[0], boxes[:, 0])
72
+ y2 = np.minimum(box[2], boxes[:, 2])
73
+ x1 = np.maximum(box[1], boxes[:, 1])
74
+ x2 = np.minimum(box[3], boxes[:, 3])
75
+ intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
76
+ union = box_area + boxes_area[:] - intersection[:]
77
+ iou = intersection / union
78
+ return iou
79
+
80
+
81
+ def compute_overlaps(boxes1, boxes2):
82
+ """Computes IoU overlaps between two sets of boxes.
83
+ boxes1, boxes2: [N, (y1, x1, y2, x2)].
84
+
85
+ For better performance, pass the largest set first and the smaller second.
86
+ """
87
+ # Areas of anchors and GT boxes
88
+ area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
89
+ area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
90
+
91
+ # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
92
+ # Each cell contains the IoU value.
93
+ overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
94
+ for i in range(overlaps.shape[1]):
95
+ box2 = boxes2[i]
96
+ overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
97
+ return overlaps
98
+
99
+
100
+ def compute_overlaps_masks(masks1, masks2):
101
+ """Computes IoU overlaps between two sets of masks.
102
+ masks1, masks2: [Height, Width, instances]
103
+ """
104
+
105
+ # If either set of masks is empty return empty result
106
+ if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
107
+ return np.zeros((masks1.shape[-1], masks2.shape[-1]))
108
+ # flatten masks and compute their areas
109
+ masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
110
+ masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
111
+ area1 = np.sum(masks1, axis=0)
112
+ area2 = np.sum(masks2, axis=0)
113
+
114
+ # intersections and union
115
+ intersections = np.dot(masks1.T, masks2)
116
+ union = area1[:, None] + area2[None, :] - intersections
117
+ overlaps = intersections / union
118
+
119
+ return overlaps
120
+
121
+
122
+ def non_max_suppression(boxes, scores, threshold):
123
+ """Performs non-maximum suppression and returns indices of kept boxes.
124
+ boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
125
+ scores: 1-D array of box scores.
126
+ threshold: Float. IoU threshold to use for filtering.
127
+ """
128
+ assert boxes.shape[0] > 0
129
+ if boxes.dtype.kind != "f":
130
+ boxes = boxes.astype(np.float32)
131
+
132
+ # Compute box areas
133
+ y1 = boxes[:, 0]
134
+ x1 = boxes[:, 1]
135
+ y2 = boxes[:, 2]
136
+ x2 = boxes[:, 3]
137
+ area = (y2 - y1) * (x2 - x1)
138
+
139
+ # Get indicies of boxes sorted by scores (highest first)
140
+ ixs = scores.argsort()[::-1]
141
+
142
+ pick = []
143
+ while len(ixs) > 0:
144
+ # Pick top box and add its index to the list
145
+ i = ixs[0]
146
+ pick.append(i)
147
+ # Compute IoU of the picked box with the rest
148
+ iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
149
+ # Identify boxes with IoU over the threshold. This
150
+ # returns indices into ixs[1:], so add 1 to get
151
+ # indices into ixs.
152
+ remove_ixs = np.where(iou > threshold)[0] + 1
153
+ # Remove indices of the picked and overlapped boxes.
154
+ ixs = np.delete(ixs, remove_ixs)
155
+ ixs = np.delete(ixs, 0)
156
+ return np.array(pick, dtype=np.int32)
157
+
158
+
159
+ def apply_box_deltas(boxes, deltas):
160
+ """Applies the given deltas to the given boxes.
161
+ boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
162
+ deltas: [N, (dy, dx, log(dh), log(dw))]
163
+ """
164
+ boxes = boxes.astype(np.float32)
165
+ # Convert to y, x, h, w
166
+ height = boxes[:, 2] - boxes[:, 0]
167
+ width = boxes[:, 3] - boxes[:, 1]
168
+ center_y = boxes[:, 0] + 0.5 * height
169
+ center_x = boxes[:, 1] + 0.5 * width
170
+ # Apply deltas
171
+ center_y += deltas[:, 0] * height
172
+ center_x += deltas[:, 1] * width
173
+ height *= np.exp(deltas[:, 2])
174
+ width *= np.exp(deltas[:, 3])
175
+ # Convert back to y1, x1, y2, x2
176
+ y1 = center_y - 0.5 * height
177
+ x1 = center_x - 0.5 * width
178
+ y2 = y1 + height
179
+ x2 = x1 + width
180
+ return np.stack([y1, x1, y2, x2], axis=1)
181
+
182
+
183
+ def box_refinement_graph(box, gt_box):
184
+ """Compute refinement needed to transform box to gt_box.
185
+ box and gt_box are [N, (y1, x1, y2, x2)]
186
+ """
187
+ box = tf.cast(box, tf.float32)
188
+ gt_box = tf.cast(gt_box, tf.float32)
189
+
190
+ height = box[:, 2] - box[:, 0]
191
+ width = box[:, 3] - box[:, 1]
192
+ center_y = box[:, 0] + 0.5 * height
193
+ center_x = box[:, 1] + 0.5 * width
194
+
195
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
196
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
197
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
198
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
199
+
200
+ dy = (gt_center_y - center_y) / height
201
+ dx = (gt_center_x - center_x) / width
202
+ dh = tf.log(gt_height / height)
203
+ dw = tf.log(gt_width / width)
204
+
205
+ result = tf.stack([dy, dx, dh, dw], axis=1)
206
+ return result
207
+
208
+
209
+ def box_refinement(box, gt_box):
210
+ """Compute refinement needed to transform box to gt_box.
211
+ box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
212
+ assumed to be outside the box.
213
+ """
214
+ box = box.astype(np.float32)
215
+ gt_box = gt_box.astype(np.float32)
216
+
217
+ height = box[:, 2] - box[:, 0]
218
+ width = box[:, 3] - box[:, 1]
219
+ center_y = box[:, 0] + 0.5 * height
220
+ center_x = box[:, 1] + 0.5 * width
221
+
222
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
223
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
224
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
225
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
226
+
227
+ dy = (gt_center_y - center_y) / height
228
+ dx = (gt_center_x - center_x) / width
229
+ dh = np.log(gt_height / height)
230
+ dw = np.log(gt_width / width)
231
+
232
+ return np.stack([dy, dx, dh, dw], axis=1)
233
+
234
+
235
+ ############################################################
236
+ # Dataset
237
+ ############################################################
238
+
239
+ class Dataset(object):
240
+ """The base class for dataset classes.
241
+ To use it, create a new class that adds functions specific to the dataset
242
+ you want to use. For example:
243
+
244
+ class CatsAndDogsDataset(Dataset):
245
+ def load_cats_and_dogs(self):
246
+ ...
247
+ def load_mask(self, image_id):
248
+ ...
249
+ def image_reference(self, image_id):
250
+ ...
251
+
252
+ See COCODataset and ShapesDataset as examples.
253
+ """
254
+
255
+ def __init__(self, class_map=None):
256
+ self._image_ids = []
257
+ self.image_info = []
258
+ # Background is always the first class
259
+ self.class_info = [{"source": "", "id": 0, "name": "BG"}]
260
+ self.source_class_ids = {}
261
+
262
+ def add_class(self, source, class_id, class_name):
263
+ assert "." not in source, "Source name cannot contain a dot"
264
+ # Does the class exist already?
265
+ for info in self.class_info:
266
+ if info['source'] == source and info["id"] == class_id:
267
+ # source.class_id combination already available, skip
268
+ return
269
+ # Add the class
270
+ self.class_info.append({
271
+ "source": source,
272
+ "id": class_id,
273
+ "name": class_name,
274
+ })
275
+
276
+ def add_image(self, source, image_id, path, **kwargs):
277
+ image_info = {
278
+ "id": image_id,
279
+ "source": source,
280
+ "path": path,
281
+ }
282
+ image_info.update(kwargs)
283
+ self.image_info.append(image_info)
284
+
285
+ def image_reference(self, image_id):
286
+ """Return a link to the image in its source Website or details about
287
+ the image that help looking it up or debugging it.
288
+
289
+ Override for your dataset, but pass to this function
290
+ if you encounter images not in your dataset.
291
+ """
292
+ return ""
293
+
294
+ def prepare(self, class_map=None):
295
+ """Prepares the Dataset class for use.
296
+
297
+ TODO: class map is not supported yet. When done, it should handle mapping
298
+ classes from different datasets to the same class ID.
299
+ """
300
+
301
+ def clean_name(name):
302
+ """Returns a shorter version of object names for cleaner display."""
303
+ return ",".join(name.split(",")[:1])
304
+
305
+ # Build (or rebuild) everything else from the info dicts.
306
+ self.num_classes = len(self.class_info)
307
+ self.class_ids = np.arange(self.num_classes)
308
+ self.class_names = [clean_name(c["name"]) for c in self.class_info]
309
+ self.num_images = len(self.image_info)
310
+ self._image_ids = np.arange(self.num_images)
311
+
312
+ # Mapping from source class and image IDs to internal IDs
313
+ self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
314
+ for info, id in zip(self.class_info, self.class_ids)}
315
+ self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
316
+ for info, id in zip(self.image_info, self.image_ids)}
317
+
318
+ # Map sources to class_ids they support
319
+ self.sources = list(set([i['source'] for i in self.class_info]))
320
+ self.source_class_ids = {}
321
+ # Loop over datasets
322
+ for source in self.sources:
323
+ self.source_class_ids[source] = []
324
+ # Find classes that belong to this dataset
325
+ for i, info in enumerate(self.class_info):
326
+ # Include BG class in all datasets
327
+ if i == 0 or source == info['source']:
328
+ self.source_class_ids[source].append(i)
329
+
330
+ def map_source_class_id(self, source_class_id):
331
+ """Takes a source class ID and returns the int class ID assigned to it.
332
+
333
+ For example:
334
+ dataset.map_source_class_id("coco.12") -> 23
335
+ """
336
+ return self.class_from_source_map[source_class_id]
337
+
338
+ def get_source_class_id(self, class_id, source):
339
+ """Map an internal class ID to the corresponding class ID in the source dataset."""
340
+ info = self.class_info[class_id]
341
+ assert info['source'] == source
342
+ return info['id']
343
+
344
+ @property
345
+ def image_ids(self):
346
+ return self._image_ids
347
+
348
+ def source_image_link(self, image_id):
349
+ """Returns the path or URL to the image.
350
+ Override this to return a URL to the image if it's available online for easy
351
+ debugging.
352
+ """
353
+ return self.image_info[image_id]["path"]
354
+
355
+ def load_image(self, image_id):
356
+ """Load the specified image and return a [H,W,3] Numpy array.
357
+ """
358
+ # Load image
359
+ image = skimage.io.imread(self.image_info[image_id]['path'])
360
+ # If grayscale. Convert to RGB for consistency.
361
+ if image.ndim != 3:
362
+ image = skimage.color.gray2rgb(image)
363
+ # If has an alpha channel, remove it for consistency
364
+ if image.shape[-1] == 4:
365
+ image = image[..., :3]
366
+ return image
367
+
368
+ def load_mask(self, image_id):
369
+ """Load instance masks for the given image.
370
+
371
+ Different datasets use different ways to store masks. Override this
372
+ method to load instance masks and return them in the form of am
373
+ array of binary masks of shape [height, width, instances].
374
+
375
+ Returns:
376
+ masks: A bool array of shape [height, width, instance count] with
377
+ a binary mask per instance.
378
+ class_ids: a 1D array of class IDs of the instance masks.
379
+ """
380
+ # Override this function to load a mask from your dataset.
381
+ # Otherwise, it returns an empty mask.
382
+ logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
383
+ mask = np.empty([0, 0, 0])
384
+ class_ids = np.empty([0], np.int32)
385
+ return mask, class_ids
386
+
387
+
388
+ def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
389
+ """Resizes an image keeping the aspect ratio unchanged.
390
+
391
+ min_dim: if provided, resizes the image such that it's smaller
392
+ dimension == min_dim
393
+ max_dim: if provided, ensures that the image longest side doesn't
394
+ exceed this value.
395
+ min_scale: if provided, ensure that the image is scaled up by at least
396
+ this percent even if min_dim doesn't require it.
397
+ mode: Resizing mode.
398
+ none: No resizing. Return the image unchanged.
399
+ square: Resize and pad with zeros to get a square image
400
+ of size [max_dim, max_dim].
401
+ pad64: Pads width and height with zeros to make them multiples of 64.
402
+ If min_dim or min_scale are provided, it scales the image up
403
+ before padding. max_dim is ignored in this mode.
404
+ The multiple of 64 is needed to ensure smooth scaling of feature
405
+ maps up and down the 6 levels of the FPN pyramid (2**6=64).
406
+ crop: Picks random crops from the image. First, scales the image based
407
+ on min_dim and min_scale, then picks a random crop of
408
+ size min_dim x min_dim. Can be used in training only.
409
+ max_dim is not used in this mode.
410
+
411
+ Returns:
412
+ image: the resized image
413
+ window: (y1, x1, y2, x2). If max_dim is provided, padding might
414
+ be inserted in the returned image. If so, this window is the
415
+ coordinates of the image part of the full image (excluding
416
+ the padding). The x2, y2 pixels are not included.
417
+ scale: The scale factor used to resize the image
418
+ padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
419
+ """
420
+ # Keep track of image dtype and return results in the same dtype
421
+ image_dtype = image.dtype
422
+ # Default window (y1, x1, y2, x2) and default scale == 1.
423
+ h, w = image.shape[:2]
424
+ window = (0, 0, h, w)
425
+ scale = 1
426
+ padding = [(0, 0), (0, 0), (0, 0)]
427
+ crop = None
428
+
429
+ if mode == "none":
430
+ return image, window, scale, padding, crop
431
+
432
+ # Scale?
433
+ if min_dim:
434
+ # Scale up but not down
435
+ scale = max(1, min_dim / min(h, w))
436
+ if min_scale and scale < min_scale:
437
+ scale = min_scale
438
+
439
+ # Does it exceed max dim?
440
+ if max_dim and mode == "square":
441
+ image_max = max(h, w)
442
+ if round(image_max * scale) > max_dim:
443
+ scale = max_dim / image_max
444
+
445
+ # Resize image using bilinear interpolation
446
+ if scale != 1:
447
+ image = resize(image, (round(h * scale), round(w * scale)),
448
+ preserve_range=True)
449
+
450
+ # Need padding or cropping?
451
+ if mode == "square":
452
+ # Get new height and width
453
+ h, w = image.shape[:2]
454
+ top_pad = (max_dim - h) // 2
455
+ bottom_pad = max_dim - h - top_pad
456
+ left_pad = (max_dim - w) // 2
457
+ right_pad = max_dim - w - left_pad
458
+ padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
459
+ image = np.pad(image, padding, mode='constant', constant_values=0)
460
+ window = (top_pad, left_pad, h + top_pad, w + left_pad)
461
+ elif mode == "pad64":
462
+ h, w = image.shape[:2]
463
+ # Both sides must be divisible by 64
464
+ assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
465
+ # Height
466
+ if h % 64 > 0:
467
+ max_h = h - (h % 64) + 64
468
+ top_pad = (max_h - h) // 2
469
+ bottom_pad = max_h - h - top_pad
470
+ else:
471
+ top_pad = bottom_pad = 0
472
+ # Width
473
+ if w % 64 > 0:
474
+ max_w = w - (w % 64) + 64
475
+ left_pad = (max_w - w) // 2
476
+ right_pad = max_w - w - left_pad
477
+ else:
478
+ left_pad = right_pad = 0
479
+ padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
480
+ image = np.pad(image, padding, mode='constant', constant_values=0)
481
+ window = (top_pad, left_pad, h + top_pad, w + left_pad)
482
+ elif mode == "crop":
483
+ # Pick a random crop
484
+ h, w = image.shape[:2]
485
+ y = random.randint(0, (h - min_dim))
486
+ x = random.randint(0, (w - min_dim))
487
+ crop = (y, x, min_dim, min_dim)
488
+ image = image[y:y + min_dim, x:x + min_dim]
489
+ window = (0, 0, min_dim, min_dim)
490
+ else:
491
+ raise Exception("Mode {} not supported".format(mode))
492
+ return image.astype(image_dtype), window, scale, padding, crop
493
+
494
+
495
+ def resize_mask(mask, scale, padding, crop=None):
496
+ """Resizes a mask using the given scale and padding.
497
+ Typically, you get the scale and padding from resize_image() to
498
+ ensure both, the image and the mask, are resized consistently.
499
+
500
+ scale: mask scaling factor
501
+ padding: Padding to add to the mask in the form
502
+ [(top, bottom), (left, right), (0, 0)]
503
+ """
504
+ # Suppress warning from scipy 0.13.0, the output shape of zoom() is
505
+ # calculated with round() instead of int()
506
+ with warnings.catch_warnings():
507
+ warnings.simplefilter("ignore")
508
+ mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
509
+ if crop is not None:
510
+ y, x, h, w = crop
511
+ mask = mask[y:y + h, x:x + w]
512
+ else:
513
+ mask = np.pad(mask, padding, mode='constant', constant_values=0)
514
+ return mask
515
+
516
+
517
+ def minimize_mask(bbox, mask, mini_shape):
518
+ """Resize masks to a smaller version to reduce memory load.
519
+ Mini-masks can be resized back to image scale using expand_masks()
520
+
521
+ See inspect_data.ipynb notebook for more details.
522
+ """
523
+ mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
524
+ for i in range(mask.shape[-1]):
525
+ # Pick slice and cast to bool in case load_mask() returned wrong dtype
526
+ m = mask[:, :, i].astype(bool)
527
+ y1, x1, y2, x2 = bbox[i][:4]
528
+ m = m[y1:y2, x1:x2]
529
+ if m.size == 0:
530
+ raise Exception("Invalid bounding box with area of zero")
531
+ # Resize with bilinear interpolation
532
+ m = resize(m, mini_shape)
533
+ mini_mask[:, :, i] = np.around(m).astype(np.bool)
534
+ return mini_mask
535
+
536
+
537
+ def expand_mask(bbox, mini_mask, image_shape):
538
+ """Resizes mini masks back to image size. Reverses the change
539
+ of minimize_mask().
540
+
541
+ See inspect_data.ipynb notebook for more details.
542
+ """
543
+ mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
544
+ for i in range(mask.shape[-1]):
545
+ m = mini_mask[:, :, i]
546
+ y1, x1, y2, x2 = bbox[i][:4]
547
+ h = y2 - y1
548
+ w = x2 - x1
549
+ # Resize with bilinear interpolation
550
+ m = resize(m, (h, w))
551
+ mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
552
+ return mask
553
+
554
+
555
+ # TODO: Build and use this function to reduce code duplication
556
+ def mold_mask(mask, config):
557
+ pass
558
+
559
+
560
+ def unmold_mask(mask, bbox, image_shape):
561
+ """Converts a mask generated by the neural network to a format similar
562
+ to its original shape.
563
+ mask: [height, width] of type float. A small, typically 28x28 mask.
564
+ bbox: [y1, x1, y2, x2]. The box to fit the mask in.
565
+
566
+ Returns a binary mask with the same size as the original image.
567
+ """
568
+ threshold = 0.5
569
+ y1, x1, y2, x2 = bbox
570
+ mask = resize(mask, (y2 - y1, x2 - x1))
571
+ mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
572
+
573
+ # Put the mask in the right location.
574
+ full_mask = np.zeros(image_shape[:2], dtype=np.bool)
575
+ full_mask[y1:y2, x1:x2] = mask
576
+ return full_mask
577
+
578
+
579
+ ############################################################
580
+ # Anchors
581
+ ############################################################
582
+
583
+ def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
584
+ """
585
+ scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
586
+ ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
587
+ shape: [height, width] spatial shape of the feature map over which
588
+ to generate anchors.
589
+ feature_stride: Stride of the feature map relative to the image in pixels.
590
+ anchor_stride: Stride of anchors on the feature map. For example, if the
591
+ value is 2 then generate anchors for every other feature map pixel.
592
+ """
593
+ # Get all combinations of scales and ratios
594
+ scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
595
+ scales = scales.flatten()
596
+ ratios = ratios.flatten()
597
+
598
+ # Enumerate heights and widths from scales and ratios
599
+ heights = scales / np.sqrt(ratios)
600
+ widths = scales * np.sqrt(ratios)
601
+
602
+ # Enumerate shifts in feature space
603
+ shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
604
+ shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
605
+ shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
606
+
607
+ # Enumerate combinations of shifts, widths, and heights
608
+ box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
609
+ box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
610
+
611
+ # Reshape to get a list of (y, x) and a list of (h, w)
612
+ box_centers = np.stack(
613
+ [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
614
+ box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
615
+
616
+ # Convert to corner coordinates (y1, x1, y2, x2)
617
+ boxes = np.concatenate([box_centers - 0.5 * box_sizes,
618
+ box_centers + 0.5 * box_sizes], axis=1)
619
+ return boxes
620
+
621
+
622
+ def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
623
+ anchor_stride):
624
+ """Generate anchors at different levels of a feature pyramid. Each scale
625
+ is associated with a level of the pyramid, but each ratio is used in
626
+ all levels of the pyramid.
627
+
628
+ Returns:
629
+ anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
630
+ with the same order of the given scales. So, anchors of scale[0] come
631
+ first, then anchors of scale[1], and so on.
632
+ """
633
+ # Anchors
634
+ # [anchor_count, (y1, x1, y2, x2)]
635
+ anchors = []
636
+ for i in range(len(scales)):
637
+ anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638
+ feature_strides[i], anchor_stride))
639
+ return np.concatenate(anchors, axis=0)
640
+
641
+
642
+ ############################################################
643
+ # Miscellaneous
644
+ ############################################################
645
+
646
+ def trim_zeros(x):
647
+ """It's common to have tensors larger than the available data and
648
+ pad with zeros. This function removes rows that are all zeros.
649
+
650
+ x: [rows, columns].
651
+ """
652
+ assert len(x.shape) == 2
653
+ return x[~np.all(x == 0, axis=1)]
654
+
655
+
656
+ def compute_matches(gt_boxes, gt_class_ids, gt_masks,
657
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
658
+ iou_threshold=0.5, score_threshold=0.0):
659
+ """Finds matches between prediction and ground truth instances.
660
+
661
+ Returns:
662
+ gt_match: 1-D array. For each GT box it has the index of the matched
663
+ predicted box.
664
+ pred_match: 1-D array. For each predicted box, it has the index of
665
+ the matched ground truth box.
666
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps.
667
+ """
668
+ # Trim zero padding
669
+ # TODO: cleaner to do zero unpadding upstream
670
+ gt_boxes = trim_zeros(gt_boxes)
671
+ gt_masks = gt_masks[..., :gt_boxes.shape[0]]
672
+ pred_boxes = trim_zeros(pred_boxes)
673
+ pred_scores = pred_scores[:pred_boxes.shape[0]]
674
+ # Sort predictions by score from high to low
675
+ indices = np.argsort(pred_scores)[::-1]
676
+ pred_boxes = pred_boxes[indices]
677
+ pred_class_ids = pred_class_ids[indices]
678
+ pred_scores = pred_scores[indices]
679
+ pred_masks = pred_masks[..., indices]
680
+
681
+ # Compute IoU overlaps [pred_masks, gt_masks]
682
+ overlaps = compute_overlaps_masks(pred_masks, gt_masks)
683
+
684
+ # Loop through predictions and find matching ground truth boxes
685
+ match_count = 0
686
+ pred_match = -1 * np.ones([pred_boxes.shape[0]])
687
+ gt_match = -1 * np.ones([gt_boxes.shape[0]])
688
+ for i in range(len(pred_boxes)):
689
+ # Find best matching ground truth box
690
+ # 1. Sort matches by score
691
+ sorted_ixs = np.argsort(overlaps[i])[::-1]
692
+ # 2. Remove low scores
693
+ low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
694
+ if low_score_idx.size > 0:
695
+ sorted_ixs = sorted_ixs[:low_score_idx[0]]
696
+ # 3. Find the match
697
+ for j in sorted_ixs:
698
+ # If ground truth box is already matched, go to next one
699
+ if gt_match[j] > -1:
700
+ continue
701
+ # If we reach IoU smaller than the threshold, end the loop
702
+ iou = overlaps[i, j]
703
+ if iou < iou_threshold:
704
+ break
705
+ # Do we have a match?
706
+ if pred_class_ids[i] == gt_class_ids[j]:
707
+ match_count += 1
708
+ gt_match[j] = i
709
+ pred_match[i] = j
710
+ break
711
+
712
+ return gt_match, pred_match, overlaps
713
+
714
+
715
+ def compute_ap(gt_boxes, gt_class_ids, gt_masks,
716
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
717
+ iou_threshold=0.5):
718
+ """Compute Average Precision at a set IoU threshold (default 0.5).
719
+
720
+ Returns:
721
+ mAP: Mean Average Precision
722
+ precisions: List of precisions at different class score thresholds.
723
+ recalls: List of recall values at different class score thresholds.
724
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps.
725
+ """
726
+ # Get matches and overlaps
727
+ gt_match, pred_match, overlaps = compute_matches(
728
+ gt_boxes, gt_class_ids, gt_masks,
729
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
730
+ iou_threshold)
731
+
732
+ # Compute precision and recall at each prediction box step
733
+ precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
734
+ recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
735
+
736
+ # Pad with start and end values to simplify the math
737
+ precisions = np.concatenate([[0], precisions, [0]])
738
+ recalls = np.concatenate([[0], recalls, [1]])
739
+
740
+ # Ensure precision values decrease but don't increase. This way, the
741
+ # precision value at each recall threshold is the maximum it can be
742
+ # for all following recall thresholds, as specified by the VOC paper.
743
+ for i in range(len(precisions) - 2, -1, -1):
744
+ precisions[i] = np.maximum(precisions[i], precisions[i + 1])
745
+
746
+ # Compute mean AP over recall range
747
+ indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
748
+ mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
749
+ precisions[indices])
750
+
751
+ return mAP, precisions, recalls, overlaps
752
+
753
+
754
+ def compute_ap_range(gt_box, gt_class_id, gt_mask,
755
+ pred_box, pred_class_id, pred_score, pred_mask,
756
+ iou_thresholds=None, verbose=1):
757
+ """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
758
+ # Default is 0.5 to 0.95 with increments of 0.05
759
+ iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
760
+
761
+ # Compute AP over range of IoU thresholds
762
+ AP = []
763
+ for iou_threshold in iou_thresholds:
764
+ ap, precisions, recalls, overlaps =\
765
+ compute_ap(gt_box, gt_class_id, gt_mask,
766
+ pred_box, pred_class_id, pred_score, pred_mask,
767
+ iou_threshold=iou_threshold)
768
+ if verbose:
769
+ print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
770
+ AP.append(ap)
771
+ AP = np.array(AP).mean()
772
+ if verbose:
773
+ print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
774
+ iou_thresholds[0], iou_thresholds[-1], AP))
775
+ return AP
776
+
777
+
778
+ def compute_recall(pred_boxes, gt_boxes, iou):
779
+ """Compute the recall at the given IoU threshold. It's an indication
780
+ of how many GT boxes were found by the given prediction boxes.
781
+
782
+ pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
783
+ gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
784
+ """
785
+ # Measure overlaps
786
+ overlaps = compute_overlaps(pred_boxes, gt_boxes)
787
+ iou_max = np.max(overlaps, axis=1)
788
+ iou_argmax = np.argmax(overlaps, axis=1)
789
+ positive_ids = np.where(iou_max >= iou)[0]
790
+ matched_gt_boxes = iou_argmax[positive_ids]
791
+
792
+ recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
793
+ return recall, positive_ids
794
+
795
+
796
+ # ## Batch Slicing
797
+ # Some custom layers support a batch size of 1 only, and require a lot of work
798
+ # to support batches greater than 1. This function slices an input tensor
799
+ # across the batch dimension and feeds batches of size 1. Effectively,
800
+ # an easy way to support batches > 1 quickly with little code modification.
801
+ # In the long run, it's more efficient to modify the code to support large
802
+ # batches and getting rid of this function. Consider this a temporary solution
803
+ def batch_slice(inputs, graph_fn, batch_size, names=None):
804
+ """Splits inputs into slices and feeds each slice to a copy of the given
805
+ computation graph and then combines the results. It allows you to run a
806
+ graph on a batch of inputs even if the graph is written to support one
807
+ instance only.
808
+
809
+ inputs: list of tensors. All must have the same first dimension length
810
+ graph_fn: A function that returns a TF tensor that's part of a graph.
811
+ batch_size: number of slices to divide the data into.
812
+ names: If provided, assigns names to the resulting tensors.
813
+ """
814
+ if not isinstance(inputs, list):
815
+ inputs = [inputs]
816
+
817
+ outputs = []
818
+ for i in range(batch_size):
819
+ inputs_slice = [x[i] for x in inputs]
820
+ output_slice = graph_fn(*inputs_slice)
821
+ if not isinstance(output_slice, (tuple, list)):
822
+ output_slice = [output_slice]
823
+ outputs.append(output_slice)
824
+ # Change outputs from a list of slices where each is
825
+ # a list of outputs to a list of outputs and each has
826
+ # a list of slices
827
+ outputs = list(zip(*outputs))
828
+
829
+ if names is None:
830
+ names = [None] * len(outputs)
831
+
832
+ result = [tf.stack(o, axis=0, name=n)
833
+ for o, n in zip(outputs, names)]
834
+ if len(result) == 1:
835
+ result = result[0]
836
+
837
+ return result
838
+
839
+
840
+ def download_trained_weights(coco_model_path, verbose=1):
841
+ """Download COCO trained weights from Releases.
842
+
843
+ coco_model_path: local path of COCO trained weights
844
+ """
845
+ if verbose > 0:
846
+ print("Downloading pretrained model to " + coco_model_path + " ...")
847
+ with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
848
+ shutil.copyfileobj(resp, out)
849
+ if verbose > 0:
850
+ print("... done downloading pretrained model!")
851
+
852
+
853
+ def norm_boxes(boxes, shape):
854
+ """Converts boxes from pixel coordinates to normalized coordinates.
855
+ boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
856
+ shape: [..., (height, width)] in pixels
857
+
858
+ Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
859
+ coordinates it's inside the box.
860
+
861
+ Returns:
862
+ [N, (y1, x1, y2, x2)] in normalized coordinates
863
+ """
864
+ h, w = shape
865
+ scale = np.array([h - 1, w - 1, h - 1, w - 1])
866
+ shift = np.array([0, 0, 1, 1])
867
+ return np.divide((boxes - shift), scale).astype(np.float32)
868
+
869
+
870
+ def denorm_boxes(boxes, shape):
871
+ """Converts boxes from normalized coordinates to pixel coordinates.
872
+ boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
873
+ shape: [..., (height, width)] in pixels
874
+
875
+ Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
876
+ coordinates it's inside the box.
877
+
878
+ Returns:
879
+ [N, (y1, x1, y2, x2)] in pixel coordinates
880
+ """
881
+ h, w = shape
882
+ scale = np.array([h - 1, w - 1, h - 1, w - 1])
883
+ shift = np.array([0, 0, 1, 1])
884
+ return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
885
+
886
+
887
+ def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
888
+ preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
889
+ """A wrapper for Scikit-Image resize().
890
+
891
+ Scikit-Image generates warnings on every call to resize() if it doesn't
892
+ receive the right parameters. The right parameters depend on the version
893
+ of skimage. This solves the problem by using different parameters per
894
+ version. And it provides a central place to control resizing defaults.
895
+ """
896
+ if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
897
+ # New in 0.14: anti_aliasing. Default it to False for backward
898
+ # compatibility with skimage 0.13.
899
+ return skimage.transform.resize(
900
+ image, output_shape,
901
+ order=order, mode=mode, cval=cval, clip=clip,
902
+ preserve_range=preserve_range, anti_aliasing=anti_aliasing,
903
+ anti_aliasing_sigma=anti_aliasing_sigma)
904
+ else:
905
+ return skimage.transform.resize(
906
+ image, output_shape,
907
+ order=order, mode=mode, cval=cval, clip=clip,
908
+ preserve_range=preserve_range)
visualize.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Display and Visualization Functions.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import random
13
+ import itertools
14
+ import colorsys
15
+
16
+ import numpy as np
17
+ from skimage.measure import find_contours
18
+ import matplotlib.pyplot as plt
19
+ from matplotlib import patches, lines
20
+ from matplotlib.patches import Polygon
21
+ import IPython.display
22
+
23
+ # Root directory of the project
24
+ ROOT_DIR = os.path.abspath("../")
25
+
26
+ # Import Mask RCNN
27
+ sys.path.append(ROOT_DIR) # To find local version of the library
28
+ from mrcnn import utils
29
+
30
+
31
+ ############################################################
32
+ # Visualization
33
+ ############################################################
34
+
35
+ def display_images(images, titles=None, cols=4, cmap=None, norm=None,
36
+ interpolation=None):
37
+ """Display the given set of images, optionally with titles.
38
+ images: list or array of image tensors in HWC format.
39
+ titles: optional. A list of titles to display with each image.
40
+ cols: number of images per row
41
+ cmap: Optional. Color map to use. For example, "Blues".
42
+ norm: Optional. A Normalize instance to map values to colors.
43
+ interpolation: Optional. Image interpolation to use for display.
44
+ """
45
+ titles = titles if titles is not None else [""] * len(images)
46
+ rows = len(images) // cols + 1
47
+ plt.figure(figsize=(14, 14 * rows // cols))
48
+ i = 1
49
+ for image, title in zip(images, titles):
50
+ plt.subplot(rows, cols, i)
51
+ plt.title(title, fontsize=9)
52
+ plt.axis('off')
53
+ plt.imshow(image.astype(np.uint8), cmap=cmap,
54
+ norm=norm, interpolation=interpolation)
55
+ i += 1
56
+ plt.show()
57
+
58
+
59
+ def random_colors(N, bright=True):
60
+ """
61
+ Generate random colors.
62
+ To get visually distinct colors, generate them in HSV space then
63
+ convert to RGB.
64
+ """
65
+ brightness = 1.0 if bright else 0.7
66
+ hsv = [(i / N, 1, brightness) for i in range(N)]
67
+ colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
68
+ random.shuffle(colors)
69
+ return colors
70
+
71
+
72
+ def apply_mask(image, mask, color, alpha=0.5):
73
+ """Apply the given mask to the image.
74
+ """
75
+ for c in range(3):
76
+ image[:, :, c] = np.where(mask == 1,
77
+ image[:, :, c] *
78
+ (1 - alpha) + alpha * color[c] * 255,
79
+ image[:, :, c])
80
+ return image
81
+
82
+
83
+ def display_instances(image, boxes, masks, class_ids, class_names,
84
+ scores=None, title="",
85
+ figsize=(16, 16), ax=None,
86
+ show_mask=True, show_bbox=True,
87
+ colors=None, captions=None):
88
+ """
89
+ boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
90
+ masks: [height, width, num_instances]
91
+ class_ids: [num_instances]
92
+ class_names: list of class names of the dataset
93
+ scores: (optional) confidence scores for each box
94
+ title: (optional) Figure title
95
+ show_mask, show_bbox: To show masks and bounding boxes or not
96
+ figsize: (optional) the size of the image
97
+ colors: (optional) An array or colors to use with each object
98
+ captions: (optional) A list of strings to use as captions for each object
99
+ """
100
+ # Number of instances
101
+ N = boxes.shape[0]
102
+ if not N:
103
+ print("\n*** No instances to display *** \n")
104
+ else:
105
+ assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
106
+
107
+ # If no axis is passed, create one and automatically call show()
108
+ auto_show = False
109
+ if not ax:
110
+ _, ax = plt.subplots(1, figsize=figsize)
111
+ auto_show = True
112
+
113
+ # Generate random colors
114
+ colors = colors or random_colors(N)
115
+
116
+ # Show area outside image boundaries.
117
+ height, width = image.shape[:2]
118
+ ax.set_ylim(height + 10, -10)
119
+ ax.set_xlim(-10, width + 10)
120
+ ax.axis('off')
121
+ ax.set_title(title)
122
+
123
+ masked_image = image.astype(np.uint32).copy()
124
+ for i in range(N):
125
+ color = colors[i]
126
+
127
+ # Bounding box
128
+ if not np.any(boxes[i]):
129
+ # Skip this instance. Has no bbox. Likely lost in image cropping.
130
+ continue
131
+ y1, x1, y2, x2 = boxes[i]
132
+ if show_bbox:
133
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
134
+ alpha=0.7, linestyle="dashed",
135
+ edgecolor=color, facecolor='none')
136
+ ax.add_patch(p)
137
+
138
+ # Label
139
+ if not captions:
140
+ class_id = class_ids[i]
141
+ score = scores[i] if scores is not None else None
142
+ label = class_names[class_id]
143
+ caption = "{} {:.3f}".format(label, score) if score else label
144
+ else:
145
+ caption = captions[i]
146
+ ax.text(x1, y1 + 8, caption,
147
+ color='w', size=11, backgroundcolor="none")
148
+
149
+ # Mask
150
+ mask = masks[:, :, i]
151
+ if show_mask:
152
+ masked_image = apply_mask(masked_image, mask, color)
153
+
154
+ # Mask Polygon
155
+ # Pad to ensure proper polygons for masks that touch image edges.
156
+ padded_mask = np.zeros(
157
+ (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
158
+ padded_mask[1:-1, 1:-1] = mask
159
+ contours = find_contours(padded_mask, 0.5)
160
+ for verts in contours:
161
+ # Subtract the padding and flip (y, x) to (x, y)
162
+ verts = np.fliplr(verts) - 1
163
+ p = Polygon(verts, facecolor="none", edgecolor=color)
164
+ ax.add_patch(p)
165
+ ax.imshow(masked_image.astype(np.uint8))
166
+ if auto_show:
167
+ plt.show()
168
+
169
+
170
+ def display_differences(image,
171
+ gt_box, gt_class_id, gt_mask,
172
+ pred_box, pred_class_id, pred_score, pred_mask,
173
+ class_names, title="", ax=None,
174
+ show_mask=True, show_box=True,
175
+ iou_threshold=0.5, score_threshold=0.5):
176
+ """Display ground truth and prediction instances on the same image."""
177
+ # Match predictions to ground truth
178
+ gt_match, pred_match, overlaps = utils.compute_matches(
179
+ gt_box, gt_class_id, gt_mask,
180
+ pred_box, pred_class_id, pred_score, pred_mask,
181
+ iou_threshold=iou_threshold, score_threshold=score_threshold)
182
+ # Ground truth = green. Predictions = red
183
+ colors = [(0, 1, 0, .8)] * len(gt_match)\
184
+ + [(1, 0, 0, 1)] * len(pred_match)
185
+ # Concatenate GT and predictions
186
+ class_ids = np.concatenate([gt_class_id, pred_class_id])
187
+ scores = np.concatenate([np.zeros([len(gt_match)]), pred_score])
188
+ boxes = np.concatenate([gt_box, pred_box])
189
+ masks = np.concatenate([gt_mask, pred_mask], axis=-1)
190
+ # Captions per instance show score/IoU
191
+ captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format(
192
+ pred_score[i],
193
+ (overlaps[i, int(pred_match[i])]
194
+ if pred_match[i] > -1 else overlaps[i].max()))
195
+ for i in range(len(pred_match))]
196
+ # Set title if not provided
197
+ title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU"
198
+ # Display
199
+ display_instances(
200
+ image,
201
+ boxes, masks, class_ids,
202
+ class_names, scores, ax=ax,
203
+ show_bbox=show_box, show_mask=show_mask,
204
+ colors=colors, captions=captions,
205
+ title=title)
206
+
207
+
208
+ def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
209
+ """
210
+ anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
211
+ proposals: [n, 4] the same anchors but refined to fit objects better.
212
+ """
213
+ masked_image = image.copy()
214
+
215
+ # Pick random anchors in case there are too many.
216
+ ids = np.arange(rois.shape[0], dtype=np.int32)
217
+ ids = np.random.choice(
218
+ ids, limit, replace=False) if ids.shape[0] > limit else ids
219
+
220
+ fig, ax = plt.subplots(1, figsize=(12, 12))
221
+ if rois.shape[0] > limit:
222
+ plt.title("Showing {} random ROIs out of {}".format(
223
+ len(ids), rois.shape[0]))
224
+ else:
225
+ plt.title("{} ROIs".format(len(ids)))
226
+
227
+ # Show area outside image boundaries.
228
+ ax.set_ylim(image.shape[0] + 20, -20)
229
+ ax.set_xlim(-50, image.shape[1] + 20)
230
+ ax.axis('off')
231
+
232
+ for i, id in enumerate(ids):
233
+ color = np.random.rand(3)
234
+ class_id = class_ids[id]
235
+ # ROI
236
+ y1, x1, y2, x2 = rois[id]
237
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
238
+ edgecolor=color if class_id else "gray",
239
+ facecolor='none', linestyle="dashed")
240
+ ax.add_patch(p)
241
+ # Refined ROI
242
+ if class_id:
243
+ ry1, rx1, ry2, rx2 = refined_rois[id]
244
+ p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
245
+ edgecolor=color, facecolor='none')
246
+ ax.add_patch(p)
247
+ # Connect the top-left corners of the anchor and proposal for easy visualization
248
+ ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
249
+
250
+ # Label
251
+ label = class_names[class_id]
252
+ ax.text(rx1, ry1 + 8, "{}".format(label),
253
+ color='w', size=11, backgroundcolor="none")
254
+
255
+ # Mask
256
+ m = utils.unmold_mask(mask[id], rois[id]
257
+ [:4].astype(np.int32), image.shape)
258
+ masked_image = apply_mask(masked_image, m, color)
259
+
260
+ ax.imshow(masked_image)
261
+
262
+ # Print stats
263
+ print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
264
+ print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
265
+ print("Positive Ratio: {:.2f}".format(
266
+ class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
267
+
268
+
269
+ # TODO: Replace with matplotlib equivalent?
270
+ def draw_box(image, box, color):
271
+ """Draw 3-pixel width bounding boxes on the given image array.
272
+ color: list of 3 int values for RGB.
273
+ """
274
+ y1, x1, y2, x2 = box
275
+ image[y1:y1 + 2, x1:x2] = color
276
+ image[y2:y2 + 2, x1:x2] = color
277
+ image[y1:y2, x1:x1 + 2] = color
278
+ image[y1:y2, x2:x2 + 2] = color
279
+ return image
280
+
281
+
282
+ def display_top_masks(image, mask, class_ids, class_names, limit=4):
283
+ """Display the given image and the top few class masks."""
284
+ to_display = []
285
+ titles = []
286
+ to_display.append(image)
287
+ titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
288
+ # Pick top prominent classes in this image
289
+ unique_class_ids = np.unique(class_ids)
290
+ mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
291
+ for i in unique_class_ids]
292
+ top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
293
+ key=lambda r: r[1], reverse=True) if v[1] > 0]
294
+ # Generate images and titles
295
+ for i in range(limit):
296
+ class_id = top_ids[i] if i < len(top_ids) else -1
297
+ # Pull masks of instances belonging to the same class.
298
+ m = mask[:, :, np.where(class_ids == class_id)[0]]
299
+ m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
300
+ to_display.append(m)
301
+ titles.append(class_names[class_id] if class_id != -1 else "-")
302
+ display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
303
+
304
+
305
+ def plot_precision_recall(AP, precisions, recalls):
306
+ """Draw the precision-recall curve.
307
+
308
+ AP: Average precision at IoU >= 0.5
309
+ precisions: list of precision values
310
+ recalls: list of recall values
311
+ """
312
+ # Plot the Precision-Recall curve
313
+ _, ax = plt.subplots(1)
314
+ ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
315
+ ax.set_ylim(0, 1.1)
316
+ ax.set_xlim(0, 1.1)
317
+ _ = ax.plot(recalls, precisions)
318
+
319
+
320
+ def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
321
+ overlaps, class_names, threshold=0.5):
322
+ """Draw a grid showing how ground truth objects are classified.
323
+ gt_class_ids: [N] int. Ground truth class IDs
324
+ pred_class_id: [N] int. Predicted class IDs
325
+ pred_scores: [N] float. The probability scores of predicted classes
326
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes.
327
+ class_names: list of all class names in the dataset
328
+ threshold: Float. The prediction probability required to predict a class
329
+ """
330
+ gt_class_ids = gt_class_ids[gt_class_ids != 0]
331
+ pred_class_ids = pred_class_ids[pred_class_ids != 0]
332
+
333
+ plt.figure(figsize=(12, 10))
334
+ plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
335
+ plt.yticks(np.arange(len(pred_class_ids)),
336
+ ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
337
+ for i, id in enumerate(pred_class_ids)])
338
+ plt.xticks(np.arange(len(gt_class_ids)),
339
+ [class_names[int(id)] for id in gt_class_ids], rotation=90)
340
+
341
+ thresh = overlaps.max() / 2.
342
+ for i, j in itertools.product(range(overlaps.shape[0]),
343
+ range(overlaps.shape[1])):
344
+ text = ""
345
+ if overlaps[i, j] > threshold:
346
+ text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
347
+ color = ("white" if overlaps[i, j] > thresh
348
+ else "black" if overlaps[i, j] > 0
349
+ else "grey")
350
+ plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
351
+ horizontalalignment="center", verticalalignment="center",
352
+ fontsize=9, color=color)
353
+
354
+ plt.tight_layout()
355
+ plt.xlabel("Ground Truth")
356
+ plt.ylabel("Predictions")
357
+
358
+
359
+ def draw_boxes(image, boxes=None, refined_boxes=None,
360
+ masks=None, captions=None, visibilities=None,
361
+ title="", ax=None):
362
+ """Draw bounding boxes and segmentation masks with different
363
+ customizations.
364
+
365
+ boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
366
+ refined_boxes: Like boxes, but draw with solid lines to show
367
+ that they're the result of refining 'boxes'.
368
+ masks: [N, height, width]
369
+ captions: List of N titles to display on each box
370
+ visibilities: (optional) List of values of 0, 1, or 2. Determine how
371
+ prominent each bounding box should be.
372
+ title: An optional title to show over the image
373
+ ax: (optional) Matplotlib axis to draw on.
374
+ """
375
+ # Number of boxes
376
+ assert boxes is not None or refined_boxes is not None
377
+ N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
378
+
379
+ # Matplotlib Axis
380
+ if not ax:
381
+ _, ax = plt.subplots(1, figsize=(12, 12))
382
+
383
+ # Generate random colors
384
+ colors = random_colors(N)
385
+
386
+ # Show area outside image boundaries.
387
+ margin = image.shape[0] // 10
388
+ ax.set_ylim(image.shape[0] + margin, -margin)
389
+ ax.set_xlim(-margin, image.shape[1] + margin)
390
+ ax.axis('off')
391
+
392
+ ax.set_title(title)
393
+
394
+ masked_image = image.astype(np.uint32).copy()
395
+ for i in range(N):
396
+ # Box visibility
397
+ visibility = visibilities[i] if visibilities is not None else 1
398
+ if visibility == 0:
399
+ color = "gray"
400
+ style = "dotted"
401
+ alpha = 0.5
402
+ elif visibility == 1:
403
+ color = colors[i]
404
+ style = "dotted"
405
+ alpha = 1
406
+ elif visibility == 2:
407
+ color = colors[i]
408
+ style = "solid"
409
+ alpha = 1
410
+
411
+ # Boxes
412
+ if boxes is not None:
413
+ if not np.any(boxes[i]):
414
+ # Skip this instance. Has no bbox. Likely lost in cropping.
415
+ continue
416
+ y1, x1, y2, x2 = boxes[i]
417
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
418
+ alpha=alpha, linestyle=style,
419
+ edgecolor=color, facecolor='none')
420
+ ax.add_patch(p)
421
+
422
+ # Refined boxes
423
+ if refined_boxes is not None and visibility > 0:
424
+ ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
425
+ p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
426
+ edgecolor=color, facecolor='none')
427
+ ax.add_patch(p)
428
+ # Connect the top-left corners of the anchor and proposal
429
+ if boxes is not None:
430
+ ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
431
+
432
+ # Captions
433
+ if captions is not None:
434
+ caption = captions[i]
435
+ # If there are refined boxes, display captions on them
436
+ if refined_boxes is not None:
437
+ y1, x1, y2, x2 = ry1, rx1, ry2, rx2
438
+ ax.text(x1, y1, caption, size=11, verticalalignment='top',
439
+ color='w', backgroundcolor="none",
440
+ bbox={'facecolor': color, 'alpha': 0.5,
441
+ 'pad': 2, 'edgecolor': 'none'})
442
+
443
+ # Masks
444
+ if masks is not None:
445
+ mask = masks[:, :, i]
446
+ masked_image = apply_mask(masked_image, mask, color)
447
+ # Mask Polygon
448
+ # Pad to ensure proper polygons for masks that touch image edges.
449
+ padded_mask = np.zeros(
450
+ (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
451
+ padded_mask[1:-1, 1:-1] = mask
452
+ contours = find_contours(padded_mask, 0.5)
453
+ for verts in contours:
454
+ # Subtract the padding and flip (y, x) to (x, y)
455
+ verts = np.fliplr(verts) - 1
456
+ p = Polygon(verts, facecolor="none", edgecolor=color)
457
+ ax.add_patch(p)
458
+ ax.imshow(masked_image.astype(np.uint8))
459
+
460
+
461
+ def display_table(table):
462
+ """Display values in a table format.
463
+ table: an iterable of rows, and each row is an iterable of values.
464
+ """
465
+ html = ""
466
+ for row in table:
467
+ row_html = ""
468
+ for col in row:
469
+ row_html += "<td>{:40}</td>".format(str(col))
470
+ html += "<tr>" + row_html + "</tr>"
471
+ html = "<table>" + html + "</table>"
472
+ IPython.display.display(IPython.display.HTML(html))
473
+
474
+
475
+ def display_weight_stats(model):
476
+ """Scans all the weights in the model and returns a list of tuples
477
+ that contain stats about each weight.
478
+ """
479
+ layers = model.get_trainable_layers()
480
+ table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
481
+ for l in layers:
482
+ weight_values = l.get_weights() # list of Numpy arrays
483
+ weight_tensors = l.weights # list of TF tensors
484
+ for i, w in enumerate(weight_values):
485
+ weight_name = weight_tensors[i].name
486
+ # Detect problematic layers. Exclude biases of conv layers.
487
+ alert = ""
488
+ if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
489
+ alert += "<span style='color:red'>*** dead?</span>"
490
+ if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
491
+ alert += "<span style='color:red'>*** Overflow?</span>"
492
+ # Add row
493
+ table.append([
494
+ weight_name + alert,
495
+ str(w.shape),
496
+ "{:+9.4f}".format(w.min()),
497
+ "{:+10.4f}".format(w.max()),
498
+ "{:+9.4f}".format(w.std()),
499
+ ])
500
+ display_table(table)