# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # This file was automatically generated from src/transformers/models/modernvbert/modular_modernvbert.py. # Do NOT edit this file manually as any edits will be overwritten by the generation of # the file from the modular. If any change should be done, please apply the change to the # modular_modernvbert.py file directly. One of our CI enforces this. # 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 # Copyright 2026 Illuin Technology and contributors, and The HuggingFace Inc. team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Any, Literal from ...configuration_utils import PretrainedConfig from ..auto import CONFIG_MAPPING, AutoConfig class ModernVBertConfig(PretrainedConfig): r""" This is the configuration class to store the configuration of a [`ModernVBert`] model. It is used to instantiate a ModernVBert model according to the specified arguments and defines the model architecture. e.g. [ModernVBERT/modernvbert](https://huggingface.co/ModernVBERT/modernvbert). Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. See the documentation for [`PretrainedConfig`] for more details. Args: text_config (`AutoConfig`, *optional*): Configuration for the text encoder. vision_config (`ModernVBertVisionConfig`, *optional*): Configuration for the vision encoder. image_token_id (`int | None`, *optional*, defaults to 50407): The token id reserved for image tokens inserted into the text stream. pixel_shuffle_factor (`int | None`, *optional*, defaults to 4): Scale factor used by any pixel-shuffle / upsampling operations in the vision head. initializer_range (`float | None`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. initializer_cutoff_factor (`float | None`, *optional*, defaults to 2.0): The cutoff factor for the truncated_normal_initializer for initializing all weight matrices. classifier_pooling (`Literal["cls", "mean"]`, *optional*, defaults to `"cls"`): The pooling strategy to use for classification tasks. classifier_dropout (`float | None`, *optional*, defaults to 0.0): The dropout probability for the classification head. classifier_bias (`bool | None`, *optional*, defaults to `False`): Whether to add a bias term to the classification head. Example: ```python >>> from transformers import ModernVBertConfig >>> # Initializing configuration >>> configuration = ModernVBertConfig() >>> # Initializing a model from the configuration (model class is implemented in >>> # `modernvbert.modeling_modernvbert`) >>> from transformers import ModernVBertModel >>> model = ModernVBertModel(configuration) >>> # Accessing the model configuration >>> cfg = model.config ```""" model_type = "modernvbert" sub_configs: dict[str, Any] = {"text_config": AutoConfig, "vision_config": AutoConfig} def __init__( self, text_config=None, vision_config=None, image_token_id: int | None = 50407, pixel_shuffle_factor: int | None = 4, initializer_range: float | None = 0.02, initializer_cutoff_factor: float | None = 2.0, classifier_pooling: Literal["cls", "mean"] = "cls", classifier_dropout: float | None = 0.0, classifier_bias: bool | None = False, **kwargs, ): if classifier_pooling not in ["cls", "mean"]: raise ValueError( f'Invalid value for `classifier_pooling`, should be either "cls" or "mean", but is {classifier_pooling}.' ) if text_config is None: text_config = CONFIG_MAPPING["modernbert"]() elif isinstance(text_config, dict): text_config = CONFIG_MAPPING["modernbert"](**text_config) self.text_config = text_config if vision_config is None: vision_config = CONFIG_MAPPING["siglip_vision_model"]() elif isinstance(vision_config, dict): vision_config = CONFIG_MAPPING["siglip_vision_model"](**vision_config) self.vision_config = vision_config self.pixel_shuffle_factor = pixel_shuffle_factor self.initializer_range = initializer_range self.initializer_cutoff_factor = initializer_cutoff_factor self.classifier_pooling = classifier_pooling self.classifier_dropout = classifier_dropout self.classifier_bias = classifier_bias super().__init__(image_token_id=image_token_id, **kwargs) __all__ = ["ModernVBertConfig"]