Spaces:
Configuration error
Configuration error
| """ | |
| Mask R-CNN | |
| Base Configurations class. | |
| Copyright (c) 2017 Matterport, Inc. | |
| Licensed under the MIT License (see LICENSE for details) | |
| Written by Waleed Abdulla | |
| """ | |
| import numpy as np | |
| # Base Configuration Class | |
| # Don't use this class directly. Instead, sub-class it and override | |
| # the configurations you need to change. | |
| class Config(object): | |
| """Base configuration class. For custom configurations, create a | |
| sub-class that inherits from this one and override properties | |
| that need to be changed. | |
| """ | |
| # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. | |
| # Useful if your code needs to do things differently depending on which | |
| # experiment is running. | |
| NAME = None # Override in sub-classes | |
| # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. | |
| GPU_COUNT = 1 | |
| # Number of images to train with on each GPU. A 12GB GPU can typically | |
| # handle 2 images of 1024x1024px. | |
| # Adjust based on your GPU memory and image sizes. Use the highest | |
| # number that your GPU can handle for best performance. | |
| IMAGES_PER_GPU = 2 | |
| # Number of training steps per epoch | |
| # This doesn't need to match the size of the training set. Tensorboard | |
| # updates are saved at the end of each epoch, so setting this to a | |
| # smaller number means getting more frequent TensorBoard updates. | |
| # Validation stats are also calculated at each epoch end and they | |
| # might take a while, so don't set this too small to avoid spending | |
| # a lot of time on validation stats. | |
| STEPS_PER_EPOCH = 1000 | |
| # Number of validation steps to run at the end of every training epoch. | |
| # A bigger number improves accuracy of validation stats, but slows | |
| # down the training. | |
| VALIDATION_STEPS = 50 | |
| # Backbone network architecture | |
| # Supported values are: resnet50, resnet101. | |
| # You can also provide a callable that should have the signature | |
| # of model.resnet_graph. If you do so, you need to supply a callable | |
| # to COMPUTE_BACKBONE_SHAPE as well | |
| BACKBONE = "resnet101" | |
| # Only useful if you supply a callable to BACKBONE. Should compute | |
| # the shape of each layer of the FPN Pyramid. | |
| # See model.compute_backbone_shapes | |
| COMPUTE_BACKBONE_SHAPE = None | |
| # The strides of each layer of the FPN Pyramid. These values | |
| # are based on a Resnet101 backbone. | |
| BACKBONE_STRIDES = [4, 8, 16, 32, 64] | |
| # Size of the fully-connected layers in the classification graph | |
| FPN_CLASSIF_FC_LAYERS_SIZE = 1024 | |
| # Size of the top-down layers used to build the feature pyramid | |
| TOP_DOWN_PYRAMID_SIZE = 256 | |
| # Number of classification classes (including background) | |
| NUM_CLASSES = 1 # Override in sub-classes | |
| # Length of square anchor side in pixels | |
| RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) | |
| # Ratios of anchors at each cell (width/height) | |
| # A value of 1 represents a square anchor, and 0.5 is a wide anchor | |
| RPN_ANCHOR_RATIOS = [0.5, 1, 2] | |
| # Anchor stride | |
| # If 1 then anchors are created for each cell in the backbone feature map. | |
| # If 2, then anchors are created for every other cell, and so on. | |
| RPN_ANCHOR_STRIDE = 1 | |
| # Non-max suppression threshold to filter RPN proposals. | |
| # You can increase this during training to generate more propsals. | |
| RPN_NMS_THRESHOLD = 0.7 | |
| # How many anchors per image to use for RPN training | |
| RPN_TRAIN_ANCHORS_PER_IMAGE = 256 | |
| # ROIs kept after tf.nn.top_k and before non-maximum suppression | |
| PRE_NMS_LIMIT = 6000 | |
| # ROIs kept after non-maximum suppression (training and inference) | |
| POST_NMS_ROIS_TRAINING = 2000 | |
| POST_NMS_ROIS_INFERENCE = 1000 | |
| # If enabled, resizes instance masks to a smaller size to reduce | |
| # memory load. Recommended when using high-resolution images. | |
| USE_MINI_MASK = True | |
| MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask | |
| # Input image resizing | |
| # Generally, use the "square" resizing mode for training and predicting | |
| # and it should work well in most cases. In this mode, images are scaled | |
| # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the | |
| # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is | |
| # padded with zeros to make it a square so multiple images can be put | |
| # in one batch. | |
| # Available resizing modes: | |
| # none: No resizing or padding. Return the image unchanged. | |
| # square: Resize and pad with zeros to get a square image | |
| # of size [max_dim, max_dim]. | |
| # pad64: Pads width and height with zeros to make them multiples of 64. | |
| # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales | |
| # up before padding. IMAGE_MAX_DIM is ignored in this mode. | |
| # The multiple of 64 is needed to ensure smooth scaling of feature | |
| # maps up and down the 6 levels of the FPN pyramid (2**6=64). | |
| # crop: Picks random crops from the image. First, scales the image based | |
| # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of | |
| # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. | |
| # IMAGE_MAX_DIM is not used in this mode. | |
| IMAGE_RESIZE_MODE = "square" | |
| IMAGE_MIN_DIM = 800 | |
| IMAGE_MAX_DIM = 1024 | |
| # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further | |
| # up scaling. For example, if set to 2 then images are scaled up to double | |
| # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. | |
| # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. | |
| IMAGE_MIN_SCALE = 0 | |
| # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 | |
| # Changing this requires other changes in the code. See the WIKI for more | |
| # details: https://github.com/matterport/Mask_RCNN/wiki | |
| IMAGE_CHANNEL_COUNT = 3 | |
| # Image mean (RGB) | |
| MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) | |
| # Number of ROIs per image to feed to classifier/mask heads | |
| # The Mask RCNN paper uses 512 but often the RPN doesn't generate | |
| # enough positive proposals to fill this and keep a positive:negative | |
| # ratio of 1:3. You can increase the number of proposals by adjusting | |
| # the RPN NMS threshold. | |
| TRAIN_ROIS_PER_IMAGE = 200 | |
| # Percent of positive ROIs used to train classifier/mask heads | |
| ROI_POSITIVE_RATIO = 0.33 | |
| # Pooled ROIs | |
| POOL_SIZE = 7 | |
| MASK_POOL_SIZE = 14 | |
| # Shape of output mask | |
| # To change this you also need to change the neural network mask branch | |
| MASK_SHAPE = [28, 28] | |
| # Maximum number of ground truth instances to use in one image | |
| MAX_GT_INSTANCES = 100 | |
| # Bounding box refinement standard deviation for RPN and final detections. | |
| RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
| BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
| # Max number of final detections | |
| DETECTION_MAX_INSTANCES = 100 | |
| # Minimum probability value to accept a detected instance | |
| # ROIs below this threshold are skipped | |
| DETECTION_MIN_CONFIDENCE = 0.7 | |
| # Non-maximum suppression threshold for detection | |
| DETECTION_NMS_THRESHOLD = 0.3 | |
| # Learning rate and momentum | |
| # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes | |
| # weights to explode. Likely due to differences in optimizer | |
| # implementation. | |
| LEARNING_RATE = 0.001 | |
| LEARNING_MOMENTUM = 0.9 | |
| # Weight decay regularization | |
| WEIGHT_DECAY = 0.0001 | |
| # Loss weights for more precise optimization. | |
| # Can be used for R-CNN training setup. | |
| LOSS_WEIGHTS = { | |
| "rpn_class_loss": 1., | |
| "rpn_bbox_loss": 1., | |
| "mrcnn_class_loss": 1., | |
| "mrcnn_bbox_loss": 1., | |
| "mrcnn_mask_loss": 1. | |
| } | |
| # Use RPN ROIs or externally generated ROIs for training | |
| # Keep this True for most situations. Set to False if you want to train | |
| # the head branches on ROI generated by code rather than the ROIs from | |
| # the RPN. For example, to debug the classifier head without having to | |
| # train the RPN. | |
| USE_RPN_ROIS = True | |
| # Train or freeze batch normalization layers | |
| # None: Train BN layers. This is the normal mode | |
| # False: Freeze BN layers. Good when using a small batch size | |
| # True: (don't use). Set layer in training mode even when predicting | |
| TRAIN_BN = False # Defaulting to False since batch size is often small | |
| # Gradient norm clipping | |
| GRADIENT_CLIP_NORM = 5.0 | |
| def __init__(self): | |
| """Set values of computed attributes.""" | |
| # Effective batch size | |
| self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT | |
| # Input image size | |
| if self.IMAGE_RESIZE_MODE == "crop": | |
| self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, | |
| self.IMAGE_CHANNEL_COUNT]) | |
| else: | |
| self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, | |
| self.IMAGE_CHANNEL_COUNT]) | |
| # Image meta data length | |
| # See compose_image_meta() for details | |
| self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES | |
| def display(self): | |
| """Display Configuration values.""" | |
| print("\nConfigurations:") | |
| for a in dir(self): | |
| if not a.startswith("__") and not callable(getattr(self, a)): | |
| print("{:30} {}".format(a, getattr(self, a))) | |
| print("\n") | |