modernvbert_hf / configuration_modernvbert.py

Upload folder using huggingface_hub

7435261 verified about 1 month ago

5.6 kB

	# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
	# This file was automatically generated from src/transformers/models/modernvbert/modular_modernvbert.py.
	# Do NOT edit this file manually as any edits will be overwritten by the generation of
	# the file from the modular. If any change should be done, please apply the change to the
	# modular_modernvbert.py file directly. One of our CI enforces this.
	# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
	# Copyright 2026 Illuin Technology and contributors, and The HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from typing import Any, Literal

	from ...configuration_utils import PretrainedConfig
	from ..auto import CONFIG_MAPPING, AutoConfig


	class ModernVBertConfig(PretrainedConfig):
	r"""
	This is the configuration class to store the configuration of a [`ModernVBert`] model. It is used to
	instantiate a ModernVBert model according to the specified arguments and defines the model architecture.
	e.g. [ModernVBERT/modernvbert](https://huggingface.co/ModernVBERT/modernvbert).

	Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
	See the documentation for [`PretrainedConfig`] for more details.

	Args:
	text_config (`AutoConfig`, optional): Configuration for the text encoder.
	vision_config (`ModernVBertVisionConfig`, optional): Configuration for the vision encoder.
	image_token_id (`int \| None`, optional, defaults to 50407): The token id reserved for image tokens inserted into the text stream.
	pixel_shuffle_factor (`int \| None`, optional, defaults to 4): Scale factor used by any pixel-shuffle / upsampling operations in the vision head.
	initializer_range (`float \| None`, optional, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
	initializer_cutoff_factor (`float \| None`, optional, defaults to 2.0): The cutoff factor for the truncated_normal_initializer for initializing all weight matrices.
	classifier_pooling (`Literal["cls", "mean"]`, optional, defaults to `"cls"`): The pooling strategy to use for classification tasks.
	classifier_dropout (`float \| None`, optional, defaults to 0.0): The dropout probability for the classification head.
	classifier_bias (`bool \| None`, optional, defaults to `False`): Whether to add a bias term to the classification head.

	Example:
	```python
	>>> from transformers import ModernVBertConfig

	>>> # Initializing configuration
	>>> configuration = ModernVBertConfig()

	>>> # Initializing a model from the configuration (model class is implemented in
	>>> # `modernvbert.modeling_modernvbert`)

	>>> from transformers import ModernVBertModel
	>>> model = ModernVBertModel(configuration)

	>>> # Accessing the model configuration
	>>> cfg = model.config
	```"""

	model_type = "modernvbert"
	sub_configs: dict[str, Any] = {"text_config": AutoConfig, "vision_config": AutoConfig}

	def __init__(
	self,
	text_config=None,
	vision_config=None,
	image_token_id: int \| None = 50407,
	pixel_shuffle_factor: int \| None = 4,
	initializer_range: float \| None = 0.02,
	initializer_cutoff_factor: float \| None = 2.0,
	classifier_pooling: Literal["cls", "mean"] = "cls",
	classifier_dropout: float \| None = 0.0,
	classifier_bias: bool \| None = False,
	**kwargs,
	):
	if classifier_pooling not in ["cls", "mean"]:
	raise ValueError(
	f'Invalid value for `classifier_pooling`, should be either "cls" or "mean", but is {classifier_pooling}.'
	)

	if text_config is None:
	text_config = CONFIG_MAPPING["modernbert"]()
	elif isinstance(text_config, dict):
	text_config = CONFIG_MAPPING["modernbert"](**text_config)
	self.text_config = text_config

	if vision_config is None:
	vision_config = CONFIG_MAPPING["siglip_vision_model"]()
	elif isinstance(vision_config, dict):
	vision_config = CONFIG_MAPPING["siglip_vision_model"](**vision_config)
	self.vision_config = vision_config

	self.pixel_shuffle_factor = pixel_shuffle_factor
	self.initializer_range = initializer_range
	self.initializer_cutoff_factor = initializer_cutoff_factor
	self.classifier_pooling = classifier_pooling
	self.classifier_dropout = classifier_dropout
	self.classifier_bias = classifier_bias

	super().__init__(image_token_id=image_token_id, **kwargs)


	__all__ = ["ModernVBertConfig"]