Spaces:

benzweijia
/

Adv-GRPO_DINO

Running on Zero

App Files Files Community

Adv-GRPO_DINO / adv_grpo /inflated_layers.py

benzweijia

Upload 61 files

9294bc7 verified 20 days ago

raw

history blame contribute delete

11.3 kB

	from functools import partial
	from typing import Literal
	from einops import rearrange
	from torch import Tensor
	from torch.nn import ConvTranspose2d, ConvTranspose3d

	from flow_grpo.inflated_lib import (
	MemoryState,
	extend_head,
	inflate_bias,
	inflate_distribution_bias,
	inflate_distribution_weight,
	inflate_weight,
	modify_state_dict,
	)
	from flow_grpo.conv_gradfix import GradFixConv2d, GradFixConv3d

	VERBOSE = False

	_inflation_mode_t = (Literal["none", "flatten", "partial_flatten", "pad", "tile"],)
	_direction_t = Literal["", "out", "in"]


	class InflatedCausalConv3d(GradFixConv3d):
	"""
	Note:
	To align the behavior of pretrained 2D models,
	if you compose a video clip from a single image by:
	- duplicating: set shape_norm = True
	- padding zeros: set shape_norm = False
	to avoid gaps in the beginning of training process.
	"""

	def __init__(
	self, args, inflation_mode: _inflation_mode_t, shape_norm: bool = True, *kwargs
	):
	self.shape_norm = shape_norm
	self.inflation_mode = inflation_mode
	self.padding_bank = None
	super().__init__(args, *kwargs)
	self.temporal_padding = self.padding[0]
	self.padding = (0, *self.padding[1:]) # Remove temporal pad to keep causal.

	def forward(self, input: Tensor, memory_state: MemoryState = MemoryState.DISABLED) -> Tensor:
	bank_size = self.stride[0] - self.kernel_size[0]
	padding_bank = (
	input[:, :, bank_size:].detach()
	if (bank_size != 0 and memory_state != MemoryState.DISABLED)
	else None
	)
	if (self.padding_bank is not None) and (memory_state == MemoryState.ACTIVE):
	input = extend_head(input, memory=self.padding_bank)
	else:
	input = extend_head(input, times=self.temporal_padding * 2)
	if memory_state != MemoryState.DISABLED and not self.training:
	self.padding_bank = padding_bank
	return super().forward(input)

	def _load_from_state_dict(
	self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
	):
	if self.inflation_mode == "none":
	super()._load_from_state_dict(
	state_dict,
	prefix,
	local_metadata,
	strict,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)
	else:
	# NOTE: need to switch off strict
	super()._load_from_state_dict(
	modify_state_dict(
	self,
	state_dict,
	prefix,
	verbose=VERBOSE,
	inflate_weight_fn=partial(inflate_weight, position="tail"),
	inflate_bias_fn=partial(inflate_bias, position="tail"),
	),
	prefix,
	local_metadata,
	False,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)


	class InflatedDistributionCausalConv3d(GradFixConv3d):
	"""
	Note:
	Direction:
	- out: this layer generates mean/std of some distribution;
	- in: this layer takes tensors sampled from output of `out` layer as input.
	"""

	def __init__(
	self,
	*args,
	direction: _direction_t,
	inflation_mode: _inflation_mode_t,
	shape_norm: bool = True,
	**kwargs,
	):
	self.shape_norm = shape_norm
	self.inflation_mode = inflation_mode
	self.direction = direction
	self.padding_bank = None
	super().__init__(args, *kwargs)
	self.temporal_padding = self.padding[0]
	self.padding = (0, *self.padding[1:]) # Remove temporal pad to keep causal.

	def forward(self, input: Tensor, memory_state: MemoryState = MemoryState.DISABLED) -> Tensor:
	bank_size = self.stride[0] - self.kernel_size[0]
	padding_bank = (
	input[:, :, bank_size:].detach()
	if (bank_size != 0 and memory_state != MemoryState.DISABLED)
	else None
	)
	if (self.padding_bank is not None) and (memory_state == MemoryState.ACTIVE):
	input = extend_head(input, memory=self.padding_bank)
	else:
	input = extend_head(input, times=self.temporal_padding * 2)
	if memory_state != MemoryState.DISABLED and not self.training:
	self.padding_bank = padding_bank
	return super().forward(input)

	def _load_from_state_dict(
	self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
	):
	if self.inflation_mode == "none":
	super()._load_from_state_dict(
	state_dict,
	prefix,
	local_metadata,
	strict,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)
	else:
	super()._load_from_state_dict(
	modify_state_dict(
	self,
	state_dict,
	prefix,
	verbose=VERBOSE,
	inflate_weight_fn=partial(
	inflate_distribution_weight, direction=self.direction, position="tail"
	),
	inflate_bias_fn=partial(
	inflate_distribution_bias, direction=self.direction, position="tail"
	),
	),
	prefix,
	local_metadata,
	False,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)


	class InflatedConvTranspose3d(ConvTranspose3d):
	# Note: It's not a causal one.
	def __init__(
	self, args, inflation_mode: _inflation_mode_t, shape_norm: bool = True, *kwargs
	):
	self.shape_norm = shape_norm
	self.inflation_mode = inflation_mode
	super().__init__(args, *kwargs)

	def _load_from_state_dict(
	self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
	):
	if self.inflation_mode == "none":
	super()._load_from_state_dict(
	state_dict,
	prefix,
	local_metadata,
	strict,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)
	else:
	# NOTE: need to switch off strict
	super()._load_from_state_dict(
	modify_state_dict(
	self,
	state_dict,
	prefix,
	verbose=VERBOSE,
	inflate_weight_fn=partial(inflate_weight, position="center"),
	inflate_bias_fn=partial(inflate_bias, position="center"),
	),
	prefix,
	local_metadata,
	False,
	missing_keys,
	unexpected_keys,
	error_msgs,
	)


	class FlattenedConvTranspose3d(ConvTranspose2d):
	def forward(self, input: Tensor, **kwargs) -> Tensor:
	output = rearrange(input, "b c f h w -> (b f) c h w")
	output = super().forward(output)
	output = rearrange(output, "(b f) c h w -> b c f h w", f=input.size(2))
	return output


	class FlattenedConv3d(GradFixConv2d):
	def forward(self, input: Tensor, **kwargs) -> Tensor:
	output = rearrange(input, "b c f h w -> (b f) c h w")
	output = super().forward(output)
	output = rearrange(output, "(b f) c h w -> b c f h w", f=input.size(2))
	return output


	def init_causal_conv3d(
	*args,
	inflation_mode: _inflation_mode_t,
	direction: _direction_t = "",
	partial_switch: bool = False,
	**kwargs,
	):
	"""
	Initialize a Causal-3D convolution layer.
	Parameters:
	inflation_mode: Listed as below. It's compatible with all the 3D-VAE checkpoints we have.
	- none: No inflation will be conducted.
	The loading logic of state dict will fall back to default.
	- flatten: It will produce a `fake` 3D layer,
	which simply squeeze the axis of batch size and depth together,
	and then conduct 2D convolution.
	- partial_flatten:
	- layers with `partial_switch` on: using `none` mode.
	- layers with `partial_switch` off: using `flatten` mode.
	- pad / tile: Refer to the definition of `InflatedCausalConv3d`.
	direction:
	- empty string: Ordinary causal convolution layer.
	- out / in: Refer to the definition of `InflatedDistributionCausalConv3d`.
	partial_switch: Only works when `inflation_mode` is `partial_flatten`.
	"""
	stride = kwargs.get("stride", args[3] if len(args) > 3 else None)
	padding = kwargs.get("padding", args[4] if len(args) > 4 else None)
	if "flatten" in inflation_mode:
	if (
	(
	(not stride)
	or isinstance(stride, int)
	or (isinstance(stride, list or tuple) and len(stride) < 3)
	) # if the config of stride can be used for 2D conv
	and (
	(not padding)
	or isinstance(padding, int)
	or (isinstance(padding, list or tuple) and len(padding) < 3)
	) # if the config of padding can be used for 2D conv
	and (("partial" not in inflation_mode) or (not partial_switch))
	# if it's fully-flatten mode, or with `partial_switch` off
	):
	return FlattenedConv3d(args, *kwargs)
	else:
	return InflatedCausalConv3d(args, inflation_mode="none", *kwargs)
	# Force-override
	else:
	if direction:
	return InflatedDistributionCausalConv3d(
	args, direction=direction, inflation_mode=inflation_mode, *kwargs
	)
	else:
	return InflatedCausalConv3d(args, inflation_mode=inflation_mode, *kwargs)


	def init_transposed_conv3d(
	args, inflation_mode: _inflation_mode_t, partial_switch: bool = False, *kwargs
	):
	stride = kwargs.get("stride", args[3] if len(args) > 3 else None)
	padding = kwargs.get("padding", args[4] if len(args) > 4 else None)
	if "flatten" in inflation_mode:
	if (
	(
	(not stride)
	or isinstance(stride, int)
	or (isinstance(stride, list or tuple) and len(stride) < 3)
	)
	and (
	(not padding)
	or isinstance(padding, int)
	or (isinstance(padding, list or tuple) and len(padding) < 3)
	)
	or (("partial" in inflation_mode) and not partial_switch)
	):
	return FlattenedConvTranspose3d(args, *kwargs)
	else:
	return InflatedConvTranspose3d(
	args, inflation_mode="none", *kwargs
	) # Force-override
	else:
	return InflatedConvTranspose3d(args, inflation_mode=inflation_mode, *kwargs)