Spaces:

karolmajek
/

Axial-DeepLab-SWideRNet

Runtime error

App Files Files Community

Axial-DeepLab-SWideRNet / model /encoder /axial_resnet_instances.py

karolmajek

from https://huggingface.co/spaces/akhaliq/deeplab2

d1843be over 4 years ago

raw

history blame contribute delete

17.8 kB

	# coding=utf-8
	# Copyright 2021 The Deeplab2 Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Contains Axial-ResNet model instances for Axial-DeepLab and MaX-DeepLab.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	MaX-DeepLab: "End-to-End Panoptic Segmentation with Mask Transformers",
	CVPR 2021. https://arxiv.org/abs/2012.00759
	Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
	"""

	import abc
	import collections.abc
	import copy

	from absl import logging
	import tensorflow as tf

	from deeplab2.model.encoder import axial_resnet


	def _get_default_config():
	"""Gets the default config for Axial-ResNets."""
	# The default config dictionary for an Axial-ResNet is the MaX-DeepLab-S
	# architecture for panoptic segmentation. This default config dictionary also
	# exactly matches the default arguments of the functions.
	default_config = {
	'num_blocks': [3, 4, 6, 3],
	'backbone_layer_multiplier': 1.0,
	'width_multiplier': 1.0,
	'stem_width_multiplier': 1.0,
	'output_stride': 16,
	'classification_mode': False,
	'backbone_type': 'resnet_beta',
	'use_axial_beyond_stride': 16,
	'backbone_use_transformer_beyond_stride': 32,
	'extra_decoder_use_transformer_beyond_stride': 32,
	'backbone_decoder_num_stacks': 0,
	'backbone_decoder_blocks_per_stage': 1,
	'extra_decoder_num_stacks': 0,
	'extra_decoder_blocks_per_stage': 1,
	'max_num_mask_slots': 128,
	'num_mask_slots': 128,
	'memory_channels': 256,
	'base_transformer_expansion': 1.0,
	'global_feed_forward_network_channels': 256,
	'high_resolution_output_stride': 4,
	'activation': 'relu',
	'block_group_config': {
	'attention_bottleneck_expansion': 2,
	'drop_path_keep_prob': 0.8,
	'drop_path_beyond_stride': 16,
	'drop_path_schedule': 'constant',
	'positional_encoding_type': None,
	'use_global_beyond_stride': 0,
	'use_sac_beyond_stride': 0,
	'use_squeeze_and_excite': False,
	'conv_use_recompute_grad': False,
	'axial_use_recompute_grad': True,
	'recompute_within_stride': 0,
	'transformer_use_recompute_grad': False,
	'axial_layer_config': {
	'query_shape': (129, 129),
	'key_expansion': 1,
	'value_expansion': 2,
	'memory_flange': (32, 32),
	'double_global_attention': False,
	'num_heads': 8,
	'use_query_rpe_similarity': True,
	'use_key_rpe_similarity': True,
	'use_content_similarity': True,
	'retrieve_value_rpe': True,
	'retrieve_value_content': True,
	'initialization_std_for_query_key_rpe': 1.0,
	'initialization_std_for_value_rpe': 1.0,
	'self_attention_activation': 'softmax',
	},
	'dual_path_transformer_layer_config': {
	'num_heads': 8,
	'bottleneck_expansion': 2,
	'key_expansion': 1,
	'value_expansion': 2,
	'feed_forward_network_channels': 2048,
	'use_memory_self_attention': True,
	'use_pixel2memory_feedback_attention': True,
	'transformer_activation': 'softmax',
	},
	},
	'bn_layer': tf.keras.layers.BatchNormalization,
	'conv_kernel_weight_decay': 0.0,
	}
	return default_config


	def override(config_dict, override_dict):
	"""Recursively overrides a config dict with another."""
	output_dict = copy.deepcopy(config_dict)
	for key, value in override_dict.items():
	if isinstance(value, collections.abc.Mapping):
	output_dict[key] = override(config_dict.get(key, {}), value)
	else:
	output_dict[key] = value
	return output_dict


	class AxialResNetInstance(axial_resnet.AxialResNet):
	"""A base Axial-ResNet model."""

	@classmethod
	@abc.abstractmethod
	def _get_config(cls):
	pass

	def __init__(self, name, **kwargs):
	"""Builds an Axial-ResNet model."""
	# Get the config of the current model.
	current_config = self._get_config()

	# Override the default config with the current config. This line can be
	# omitted because the default config equals the default arguments of the
	# functions that build the model. But we make all the configs explicit here.
	current_config = override(_get_default_config(), current_config)

	# Finally, override the current model config with keyword arguments. In this
	# way, we still respect arguments passed as keyword arguments, such as
	# classification_mode, output_stride, etc.
	current_config = override(current_config, kwargs)
	logging.info('Axial-ResNet final config: %s', current_config)
	super(AxialResNetInstance, self).__init__(name, **current_config)


	class MaXDeepLabS(AxialResNetInstance):
	"""MaX-DeepLab-S for panoptic segmentation.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	MaX-DeepLab: "End-to-End Panoptic Segmentation with Mask Transformers",
	CVPR 2021. https://arxiv.org/abs/2012.00759
	Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	# Return an empty dictionary as the default values are all set for
	# MaX-DeepLab-S.
	return {}


	class MaXDeepLabL(AxialResNetInstance):
	"""MaX-DeepLab-L for panoptic segmentation.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	MaX-DeepLab: "End-to-End Panoptic Segmentation with Mask Transformers",
	CVPR 2021. https://arxiv.org/abs/2012.00759
	Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	return {
	'num_blocks': [3, 6, 3, 3],
	'backbone_type': 'wider_resnet',
	'backbone_use_transformer_beyond_stride': 16,
	'extra_decoder_use_transformer_beyond_stride': 16,
	'backbone_decoder_num_stacks': 1,
	'extra_decoder_num_stacks': 1,
	'extra_decoder_blocks_per_stage': 3,
	'memory_channels': 512,
	'base_transformer_expansion': 2.0,
	'global_feed_forward_network_channels': 512,
	'block_group_config': {
	'attention_bottleneck_expansion': 4,
	'drop_path_beyond_stride': 4,
	'axial_layer_config': {
	'key_expansion': 2,
	'value_expansion': 4,
	},
	},
	}


	class MaXDeepLabSBackbone(MaXDeepLabS):
	"""MaX-DeepLab-S backbone for image classification pretraining.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	MaX-DeepLab: "End-to-End Panoptic Segmentation with Mask Transformers",
	CVPR 2021. https://arxiv.org/abs/2012.00759
	Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(MaXDeepLabSBackbone, cls)._get_config()
	# Override the config of MaXDeepLabS.
	override_config = {
	'classification_mode': True,
	# The transformer blocks are not ImageNet pretrained. They are randomly
	# initialized and trained from scratch for panoptic segmentation.
	'backbone_use_transformer_beyond_stride': 0,
	}
	return override(base_config, override_config)


	class MaXDeepLabLBackbone(MaXDeepLabL):
	"""MaX-DeepLab-L backbone for image classification pretraining.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	MaX-DeepLab: "End-to-End Panoptic Segmentation with Mask Transformers",
	CVPR 2021. https://arxiv.org/abs/2012.00759
	Huiyu Wang, Yukun Zhu, Hartwig Adam, Alan Yuille, Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(MaXDeepLabLBackbone, cls)._get_config()
	# Override the config of MaXDeepLabL.
	override_config = {
	'classification_mode': True,
	# The transformer blocks are not ImageNet pretrained. They are randomly
	# initialized and trained from scratch for panoptic segmentation.
	'backbone_use_transformer_beyond_stride': 0,
	}
	return override(base_config, override_config)


	class ResNet50(AxialResNetInstance):
	"""A ResNet-50 instance.

	Note that the implementation is different from the original ResNet-50 in:
	(1) We apply strided convolutions in the first 3x3 convolution of the first
	residual block of a stage.
	(2) We replace the strided max pooling layer in the stem by applying strided
	convolution in the immediate next residual block.
	"""

	@classmethod
	def _get_config(cls):
	return {
	'classification_mode': True,
	'backbone_type': 'resnet',
	'use_axial_beyond_stride': 0,
	'backbone_use_transformer_beyond_stride': 0,
	'block_group_config': {
	'drop_path_keep_prob': 1.0,
	},
	}


	class ResNet50Beta(ResNet50):
	"""A ResNet-50 but with inception stem.

	Note that the implementation is different from the original ResNet-50 in:
	(1) We apply strided convolutions in the first 3x3 convolution of the first
	residual block of a stage.
	(2) We replace the strided max pooling layer in the stem by applying strided
	convolution in the immediate next residual block.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(ResNet50Beta, cls)._get_config()
	# Override the config of ResNet50.
	override_config = {
	'backbone_type': 'resnet_beta',
	}
	return override(base_config, override_config)


	class AxialResNetL(ResNet50):
	"""Axial-ResNet-L for image classification only.

	Axial-ResNet-L is a ResNet50 with use_axial_beyond_stride = 2.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(AxialResNetL, cls)._get_config()
	# Override the config of ResNet50.
	override_config = {
	'use_axial_beyond_stride': 2,
	}
	return override(base_config, override_config)


	class AxialResNetS(ResNet50):
	"""Axial-ResNet-S for image classification only.

	Axial-ResNet-S is a ResNet50 with use_axial_beyond_stride = 2 and
	width_multiplier = 0.5.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(AxialResNetS, cls)._get_config()
	# Override the config of ResNet50.
	override_config = {
	'width_multiplier': 0.5,
	'use_axial_beyond_stride': 2,
	}
	return override(base_config, override_config)


	class AxialDeepLabL(ResNet50Beta):
	"""Axial-DeepLab-L for panoptic segmentation.

	Axial-DeepLab-L is a ResNet50Beta with use_axial_beyond_stride = 2.
	Axial-DeepLab-L is also equivalent to Axial-ResNet-L with an inception stem.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(AxialDeepLabL, cls)._get_config()
	override_config = {
	'use_axial_beyond_stride': 2,
	}
	return override(base_config, override_config)


	class AxialDeepLabS(ResNet50Beta):
	"""Axial-DeepLab-S for panoptic segmentation.

	Axial-DeepLab-S is a ResNet50Beta with use_axial_beyond_stride = 2 and
	width_multiplier = 0.5.
	Axial-DeepLab-S is also equivalent to Axial-ResNet-S with an inception stem.

	Reference:
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(AxialDeepLabS, cls)._get_config()
	override_config = {
	'width_multiplier': 0.5,
	'use_axial_beyond_stride': 2,
	}
	return override(base_config, override_config)


	class SWideRNet(AxialResNetInstance):
	"""A SWideRNet instance.

	Note that the implementation is different from the original SWideRNet in:
	(1) We apply strided convolutions in the first residual block of a stage,
	instead of the last residual block.
	(2) We replace the strided max pooling layer in the stem by applying strided
	convolution in the immediate next residual block.
	(3) We (optionally) use squeeze and excitation in all five stages, instead
	of the last four stages only.

	Reference:
	Scaling Wide Residual Networks for Panoptic Segmentation,
	https://arxiv.org/abs/2011.11675
	Liang-Chieh Chen, Huiyu Wang, Siyuan Qiao.
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	return {
	'num_blocks': [3, 6, 3, 3],
	'classification_mode': True,
	'backbone_type': 'wider_resnet',
	'use_axial_beyond_stride': 0,
	'backbone_use_transformer_beyond_stride': 0,
	'block_group_config': {
	'drop_path_beyond_stride': 4,
	'conv_use_recompute_grad': True,
	},
	}


	class AxialSWideRNet(SWideRNet):
	"""SWideRNet with axial attention blocks in the last two stages.

	Note that the implementation is different from the original SWideRNet in:
	(1) We apply strided convolutions in the first residual block of a stage,
	instead of the last residual block.
	(2) We replace the strided max pooling layer in the stem by applying strided
	convolution in the immediate next residual block.
	(3) We (optionally) use squeeze and excitation in all five stages, instead
	of the last four stages only.

	Reference:
	Scaling Wide Residual Networks for Panoptic Segmentation,
	https://arxiv.org/abs/2011.11675
	Liang-Chieh Chen, Huiyu Wang, Siyuan Qiao.
	Axial-Deeplab: Stand-Alone Axial-Attention for Panoptic Segmentation,
	ECCV 2020 Spotlight. https://arxiv.org/abs/2003.07853
	Huiyu Wang, Yukun Zhu, Bradley Green, Hartwig Adam, Alan Yuille,
	Liang-Chieh Chen.
	"""

	@classmethod
	def _get_config(cls):
	base_config = super(AxialSWideRNet, cls)._get_config()
	override_config = {
	'use_axial_beyond_stride': 16,
	'block_group_config': {
	'attention_bottleneck_expansion': 4,
	'axial_layer_config': {
	'key_expansion': 2,
	'value_expansion': 4,
	},
	},
	}
	return override(base_config, override_config)


	def get_model(name, **kwargs):
	"""Gets the model instance given the model name."""
	name_lower = name.lower()
	if name_lower == 'max_deeplab_s':
	return MaXDeepLabS(name_lower, **kwargs)
	elif name_lower == 'max_deeplab_l':
	return MaXDeepLabL(name_lower, **kwargs)
	elif name_lower == 'max_deeplab_s_backbone':
	return MaXDeepLabSBackbone(name_lower, **kwargs)
	elif name_lower == 'max_deeplab_l_backbone':
	return MaXDeepLabLBackbone(name_lower, **kwargs)
	elif name_lower == 'resnet50':
	return ResNet50(name_lower, **kwargs)
	elif name_lower == 'resnet50_beta':
	return ResNet50Beta(name_lower, **kwargs)
	elif name_lower == 'swidernet' or name_lower == 'wide_resnet41':
	return SWideRNet(name_lower, **kwargs)
	elif name_lower == 'axial_swidernet':
	return AxialSWideRNet(name_lower, **kwargs)
	elif name_lower == 'axial_resnet_s':
	return AxialResNetS(name_lower, **kwargs)
	elif name_lower == 'axial_resnet_l':
	return AxialResNetL(name_lower, **kwargs)
	elif name_lower == 'axial_deeplab_s':
	return AxialDeepLabS(name_lower, **kwargs)
	elif name_lower == 'axial_deeplab_l':
	return AxialDeepLabL(name_lower, **kwargs)
	else:
	raise ValueError(name_lower + ' is not supported.')