R2SE_model / mmdet3d_plugin /uniad /modules /custom_base_transformer_layer.py

Upload folder using huggingface_hub

663494c verified 2 months ago

10.6 kB

	# ---------------------------------------------
	# Copyright (c) OpenMMLab. All rights reserved.
	# ---------------------------------------------
	# Modified by Zhiqi Li
	# ---------------------------------------------

	import copy
	import warnings

	import torch

	from mmcv import ConfigDict
	from mmcv.cnn import build_norm_layer
	from mmcv.runner.base_module import BaseModule, ModuleList

	from mmcv.cnn.bricks.registry import TRANSFORMER_LAYER
	# from mmcv.cnn.bricks.transformer import build_feedforward_network, build_attention
	from mmdet3d_plugin.uniad.custom_modules.transformer import (
	build_feedforward_network, build_attention
	)


	@TRANSFORMER_LAYER.register_module()
	class MyCustomBaseTransformerLayer(BaseModule):
	"""Base `TransformerLayer` for vision transformer.
	It can be built from `mmcv.ConfigDict` and support more flexible
	customization, for example, using any number of `FFN or LN ` and
	use different kinds of `attention` by specifying a list of `ConfigDict`
	named `attn_cfgs`. It is worth mentioning that it supports `prenorm`
	when you specifying `norm` as the first element of `operation_order`.
	More details about the `prenorm`: `On Layer Normalization in the
	Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .
	Args:
	attn_cfgs (list[`mmcv.ConfigDict`] \| obj:`mmcv.ConfigDict` \| None )):
	Configs for `self_attention` or `cross_attention` modules,
	The order of the configs in the list should be consistent with
	corresponding attentions in operation_order.
	If it is a dict, all of the attention modules in operation_order
	will be built with this config. Default: None.
	ffn_cfgs (list[`mmcv.ConfigDict`] \| obj:`mmcv.ConfigDict` \| None )):
	Configs for FFN, The order of the configs in the list should be
	consistent with corresponding ffn in operation_order.
	If it is a dict, all of the attention modules in operation_order
	will be built with this config.
	operation_order (tuple[str]): The execution order of operation
	in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').
	Support `prenorm` when you specifying first element as `norm`.
	Default：None.
	norm_cfg (dict): Config dict for normalization layer.
	Default: dict(type='LN').
	init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
	Default: None.
	batch_first (bool): Key, Query and Value are shape
	of (batch, n, embed_dim)
	or (n, batch, embed_dim). Default to False.
	"""

	def __init__(
	self,
	attn_cfgs=None,
	ffn_cfgs=dict(
	type="FFN",
	embed_dims=256,
	feedforward_channels=1024,
	num_fcs=2,
	ffn_drop=0.0,
	act_cfg=dict(type="ReLU", inplace=True),
	),
	operation_order=None,
	norm_cfg=dict(type="LN"),
	init_cfg=None,
	batch_first=True,
	**kwargs,
	):

	deprecated_args = dict(
	feedforward_channels="feedforward_channels",
	ffn_dropout="ffn_drop",
	ffn_num_fcs="num_fcs",
	)
	for ori_name, new_name in deprecated_args.items():
	if ori_name in kwargs:
	warnings.warn(
	f"The arguments `{ori_name}` in BaseTransformerLayer "
	f"has been deprecated, now you should set `{new_name}` "
	f"and other FFN related arguments "
	f"to a dict named `ffn_cfgs`. "
	)
	ffn_cfgs[new_name] = kwargs[ori_name]

	super(MyCustomBaseTransformerLayer, self).__init__(init_cfg)

	self.batch_first = batch_first

	assert set(operation_order) & set(
	["self_attn", "norm", "ffn", "cross_attn"]
	) == set(operation_order), (
	f"The operation_order of"
	f" {self.__class__.__name__} should "
	f"contains all four operation type "
	f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"
	)

	num_attn = operation_order.count("self_attn") + operation_order.count(
	"cross_attn"
	)
	if isinstance(attn_cfgs, dict):
	attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
	else:
	assert num_attn == len(attn_cfgs), (
	f"The length "
	f"of attn_cfg {num_attn} is "
	f"not consistent with the number of attention"
	f"in operation_order {operation_order}."
	)

	self.num_attn = num_attn
	self.operation_order = operation_order
	self.norm_cfg = norm_cfg
	self.pre_norm = operation_order[0] == "norm"
	self.attentions = ModuleList()

	index = 0
	for operation_name in operation_order:
	if operation_name in ["self_attn", "cross_attn"]:
	if "batch_first" in attn_cfgs[index]:
	assert self.batch_first == attn_cfgs[index]["batch_first"]
	else:
	attn_cfgs[index]["batch_first"] = self.batch_first
	attention = build_attention(attn_cfgs[index])
	# Some custom attentions used as `self_attn`
	# or `cross_attn` can have different behavior.
	attention.operation_name = operation_name
	self.attentions.append(attention)
	index += 1

	self.embed_dims = self.attentions[0].embed_dims

	self.ffns = ModuleList()
	num_ffns = operation_order.count("ffn")
	if isinstance(ffn_cfgs, dict):
	ffn_cfgs = ConfigDict(ffn_cfgs)
	if isinstance(ffn_cfgs, dict):
	ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
	assert len(ffn_cfgs) == num_ffns
	for ffn_index in range(num_ffns):
	if "embed_dims" not in ffn_cfgs[ffn_index]:
	ffn_cfgs["embed_dims"] = self.embed_dims
	else:
	assert ffn_cfgs[ffn_index]["embed_dims"] == self.embed_dims

	self.ffns.append(build_feedforward_network(ffn_cfgs[ffn_index]))

	self.norms = ModuleList()
	num_norms = operation_order.count("norm")
	for _ in range(num_norms):
	self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])

	def forward(
	self,
	query,
	key=None,
	value=None,
	query_pos=None,
	key_pos=None,
	attn_masks=None,
	query_key_padding_mask=None,
	key_padding_mask=None,
	**kwargs,
	):
	"""Forward function for `TransformerDecoderLayer`.
	**kwargs contains some specific arguments of attentions.
	Args:
	query (Tensor): The input query with shape
	[num_queries, bs, embed_dims] if
	self.batch_first is False, else
	[bs, num_queries embed_dims].
	key (Tensor): The key tensor with shape [num_keys, bs,
	embed_dims] if self.batch_first is False, else
	[bs, num_keys, embed_dims] .
	value (Tensor): The value tensor with same shape as `key`.
	query_pos (Tensor): The positional encoding for `query`.
	Default: None.
	key_pos (Tensor): The positional encoding for `key`.
	Default: None.
	attn_masks (List[Tensor] \| None): 2D Tensor used in
	calculation of corresponding attention. The length of
	it should equal to the number of `attention` in
	`operation_order`. Default: None.
	query_key_padding_mask (Tensor): ByteTensor for `query`, with
	shape [bs, num_queries]. Only used in `self_attn` layer.
	Defaults to None.
	key_padding_mask (Tensor): ByteTensor for `query`, with
	shape [bs, num_keys]. Default: None.
	Returns:
	Tensor: forwarded results with shape [num_queries, bs, embed_dims].
	"""

	norm_index = 0
	attn_index = 0
	ffn_index = 0
	identity = query
	if attn_masks is None:
	attn_masks = [None for _ in range(self.num_attn)]
	elif isinstance(attn_masks, torch.Tensor):
	attn_masks = [copy.deepcopy(attn_masks) for _ in range(self.num_attn)]
	warnings.warn(
	f"Use same attn_mask in all attentions in "
	f"{self.__class__.__name__} "
	)
	else:
	assert len(attn_masks) == self.num_attn, (
	f"The length of "
	f"attn_masks {len(attn_masks)} must be equal "
	f"to the number of attention in "
	f"operation_order {self.num_attn}"
	)

	for layer in self.operation_order:
	if layer == "self_attn":
	temp_key = temp_value = query
	query = self.attentions[attn_index](
	query,
	temp_key,
	temp_value,
	identity if self.pre_norm else None,
	query_pos=query_pos,
	key_pos=query_pos,
	attn_mask=attn_masks[attn_index],
	key_padding_mask=query_key_padding_mask,
	**kwargs,
	)
	attn_index += 1
	identity = query

	elif layer == "norm":
	query = self.norms[norm_index](query)
	norm_index += 1

	elif layer == "cross_attn":
	query = self.attentions[attn_index](
	query,
	key,
	value,
	identity if self.pre_norm else None,
	query_pos=query_pos,
	key_pos=key_pos,
	attn_mask=attn_masks[attn_index],
	key_padding_mask=key_padding_mask,
	**kwargs,
	)
	attn_index += 1
	identity = query

	elif layer == "ffn":
	query = self.ffns[ffn_index](query, identity if self.pre_norm else None)
	ffn_index += 1

	return query