modernvbert_hf / configuration_modernvbert.py
paultltc's picture
Upload folder using huggingface_hub
7435261 verified
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# This file was automatically generated from src/transformers/models/modernvbert/modular_modernvbert.py.
# Do NOT edit this file manually as any edits will be overwritten by the generation of
# the file from the modular. If any change should be done, please apply the change to the
# modular_modernvbert.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# Copyright 2026 Illuin Technology and contributors, and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any, Literal
from ...configuration_utils import PretrainedConfig
from ..auto import CONFIG_MAPPING, AutoConfig
class ModernVBertConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`ModernVBert`] model. It is used to
instantiate a ModernVBert model according to the specified arguments and defines the model architecture.
e.g. [ModernVBERT/modernvbert](https://huggingface.co/ModernVBERT/modernvbert).
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
See the documentation for [`PretrainedConfig`] for more details.
Args:
text_config (`AutoConfig`, *optional*): Configuration for the text encoder.
vision_config (`ModernVBertVisionConfig`, *optional*): Configuration for the vision encoder.
image_token_id (`int | None`, *optional*, defaults to 50407): The token id reserved for image tokens inserted into the text stream.
pixel_shuffle_factor (`int | None`, *optional*, defaults to 4): Scale factor used by any pixel-shuffle / upsampling operations in the vision head.
initializer_range (`float | None`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_cutoff_factor (`float | None`, *optional*, defaults to 2.0): The cutoff factor for the truncated_normal_initializer for initializing all weight matrices.
classifier_pooling (`Literal["cls", "mean"]`, *optional*, defaults to `"cls"`): The pooling strategy to use for classification tasks.
classifier_dropout (`float | None`, *optional*, defaults to 0.0): The dropout probability for the classification head.
classifier_bias (`bool | None`, *optional*, defaults to `False`): Whether to add a bias term to the classification head.
Example:
```python
>>> from transformers import ModernVBertConfig
>>> # Initializing configuration
>>> configuration = ModernVBertConfig()
>>> # Initializing a model from the configuration (model class is implemented in
>>> # `modernvbert.modeling_modernvbert`)
>>> from transformers import ModernVBertModel
>>> model = ModernVBertModel(configuration)
>>> # Accessing the model configuration
>>> cfg = model.config
```"""
model_type = "modernvbert"
sub_configs: dict[str, Any] = {"text_config": AutoConfig, "vision_config": AutoConfig}
def __init__(
self,
text_config=None,
vision_config=None,
image_token_id: int | None = 50407,
pixel_shuffle_factor: int | None = 4,
initializer_range: float | None = 0.02,
initializer_cutoff_factor: float | None = 2.0,
classifier_pooling: Literal["cls", "mean"] = "cls",
classifier_dropout: float | None = 0.0,
classifier_bias: bool | None = False,
**kwargs,
):
if classifier_pooling not in ["cls", "mean"]:
raise ValueError(
f'Invalid value for `classifier_pooling`, should be either "cls" or "mean", but is {classifier_pooling}.'
)
if text_config is None:
text_config = CONFIG_MAPPING["modernbert"]()
elif isinstance(text_config, dict):
text_config = CONFIG_MAPPING["modernbert"](**text_config)
self.text_config = text_config
if vision_config is None:
vision_config = CONFIG_MAPPING["siglip_vision_model"]()
elif isinstance(vision_config, dict):
vision_config = CONFIG_MAPPING["siglip_vision_model"](**vision_config)
self.vision_config = vision_config
self.pixel_shuffle_factor = pixel_shuffle_factor
self.initializer_range = initializer_range
self.initializer_cutoff_factor = initializer_cutoff_factor
self.classifier_pooling = classifier_pooling
self.classifier_dropout = classifier_dropout
self.classifier_bias = classifier_bias
super().__init__(image_token_id=image_token_id, **kwargs)
__all__ = ["ModernVBertConfig"]