feipengma
/

WeMM

Feature Extraction

Model card Files Files and versions

WeMM / configuration_connector.py

feipengma

initialize wemm

f1298e6 almost 2 years ago

history blame contribute delete

3.65 kB

	from transformers import PretrainedConfig, PreTrainedModel
	import json

	class Idefics2ConnectorConfig(PretrainedConfig):
	r"""
	Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
	documentation from [`PretrainedConfig`] for more information.

	Args:
	hidden_act (`str` or `function`, optional, defaults to `"silu"`):
	The non-linear activation function (function or string) in the perceiver block.
	resampler_n_latents (`int`, optional, defaults to 64):
	Number of latent embeddings to resample ("compress") the input sequence to (usually < 128).
	resampler_depth (`int`, optional, defaults to 3):
	Depth of the Perceiver Resampler (Transformer w/ cross attention). Should be shallow (<= 3).
	resampler_n_heads (`int`, optional, defaults to 16):
	Number of heads in each Transformer block (for multi-headed self-attention).
	resampler_head_dim (`int`, optional, defaults to 96):
	Dimensionality of each head projection in the Transformer block.
	num_key_value_heads (`int`, optional, defaults to 4):
	Number of key-value heads in the perceiver attention block.
	attention_dropout (`float`, optional, defaults to 0.0):
	The dropout ratio for the attention probabilities.
	"""
	_auto_class = 'AutoConfig'
	model_type = "Idefics2ConnectorConfig"

	def __init__(
	self,
	vision_hidden_size=1152,
	hidden_size=4096,
	hidden_act="silu",
	resampler_n_latents=64,
	resampler_depth=3,
	rms_norm_eps=1e-05,
	resampler_n_heads=16,
	resampler_head_dim=96,
	num_key_value_heads=4,
	attention_dropout=0.0,
	intermediate_size=14336,
	integrate_sub_images=None,
	num_sub_images=None,
	**kwargs,
	):
	super().__init__(**kwargs)
	self.vision_hidden_size = vision_hidden_size
	self.hidden_size = hidden_size
	self.hidden_act = hidden_act
	self.resampler_n_latents = resampler_n_latents
	self.resampler_depth = resampler_depth
	self.rms_norm_eps = rms_norm_eps
	self.resampler_n_heads = resampler_n_heads
	self.num_key_value_heads = num_key_value_heads
	self.resampler_head_dim = resampler_head_dim
	self.attention_dropout = attention_dropout
	self.intermediate_size = intermediate_size
	self.integrate_sub_images = integrate_sub_images
	self.num_sub_images = num_sub_images

	if self.num_key_value_heads > self.resampler_n_heads:
	raise ValueError(
	f"num_key_value_heads={self.num_key_value_heads} must be less than or equal to"
	f" resampler_n_heads={self.resampler_n_heads}"
	)

	@classmethod
	def from_pretrained(cls, config_path, **kwargs) -> "PretrainedConfig":

	with open(config_path, "r", encoding="utf-8") as f:
	config_dict = json.load(f)
	cls = Idefics2ConnectorConfig(
	vision_hidden_size=config_dict['vision_hidden_size'],
	hidden_size=config_dict['hidden_size'],
	hidden_act="silu",
	resampler_n_latents=config_dict['resampler_n_latents'],
	resampler_depth=config_dict['resampler_depth'],
	rms_norm_eps=config_dict['rms_norm_eps'],
	intermediate_size=config_dict['intermediate_size'],
	integrate_sub_images=config_dict['integrate_sub_images'],
	num_sub_images=config_dict['num_sub_images']
	)

	return cls