| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | from typing import Any, Literal |
| |
|
| | from ...configuration_utils import PretrainedConfig |
| | from ..auto import CONFIG_MAPPING, AutoConfig |
| |
|
| |
|
| | class ModernVBertConfig(PretrainedConfig): |
| | r""" |
| | This is the configuration class to store the configuration of a [`ModernVBert`] model. It is used to |
| | instantiate a ModernVBert model according to the specified arguments and defines the model architecture. |
| | e.g. [ModernVBERT/modernvbert](https://huggingface.co/ModernVBERT/modernvbert). |
| | |
| | Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. |
| | See the documentation for [`PretrainedConfig`] for more details. |
| | |
| | Args: |
| | text_config (`AutoConfig`, *optional*): Configuration for the text encoder. |
| | vision_config (`ModernVBertVisionConfig`, *optional*): Configuration for the vision encoder. |
| | image_token_id (`int | None`, *optional*, defaults to 50407): The token id reserved for image tokens inserted into the text stream. |
| | pixel_shuffle_factor (`int | None`, *optional*, defaults to 4): Scale factor used by any pixel-shuffle / upsampling operations in the vision head. |
| | initializer_range (`float | None`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. |
| | initializer_cutoff_factor (`float | None`, *optional*, defaults to 2.0): The cutoff factor for the truncated_normal_initializer for initializing all weight matrices. |
| | classifier_pooling (`Literal["cls", "mean"]`, *optional*, defaults to `"cls"`): The pooling strategy to use for classification tasks. |
| | classifier_dropout (`float | None`, *optional*, defaults to 0.0): The dropout probability for the classification head. |
| | classifier_bias (`bool | None`, *optional*, defaults to `False`): Whether to add a bias term to the classification head. |
| | |
| | Example: |
| | ```python |
| | >>> from transformers import ModernVBertConfig |
| | |
| | >>> # Initializing configuration |
| | >>> configuration = ModernVBertConfig() |
| | |
| | >>> # Initializing a model from the configuration (model class is implemented in |
| | >>> # `modernvbert.modeling_modernvbert`) |
| | |
| | >>> from transformers import ModernVBertModel |
| | >>> model = ModernVBertModel(configuration) |
| | |
| | >>> # Accessing the model configuration |
| | >>> cfg = model.config |
| | ```""" |
| |
|
| | model_type = "modernvbert" |
| | sub_configs: dict[str, Any] = {"text_config": AutoConfig, "vision_config": AutoConfig} |
| |
|
| | def __init__( |
| | self, |
| | text_config=None, |
| | vision_config=None, |
| | image_token_id: int | None = 50407, |
| | pixel_shuffle_factor: int | None = 4, |
| | initializer_range: float | None = 0.02, |
| | initializer_cutoff_factor: float | None = 2.0, |
| | classifier_pooling: Literal["cls", "mean"] = "cls", |
| | classifier_dropout: float | None = 0.0, |
| | classifier_bias: bool | None = False, |
| | **kwargs, |
| | ): |
| | if classifier_pooling not in ["cls", "mean"]: |
| | raise ValueError( |
| | f'Invalid value for `classifier_pooling`, should be either "cls" or "mean", but is {classifier_pooling}.' |
| | ) |
| |
|
| | if text_config is None: |
| | text_config = CONFIG_MAPPING["modernbert"]() |
| | elif isinstance(text_config, dict): |
| | text_config = CONFIG_MAPPING["modernbert"](**text_config) |
| | self.text_config = text_config |
| |
|
| | if vision_config is None: |
| | vision_config = CONFIG_MAPPING["siglip_vision_model"]() |
| | elif isinstance(vision_config, dict): |
| | vision_config = CONFIG_MAPPING["siglip_vision_model"](**vision_config) |
| | self.vision_config = vision_config |
| |
|
| | self.pixel_shuffle_factor = pixel_shuffle_factor |
| | self.initializer_range = initializer_range |
| | self.initializer_cutoff_factor = initializer_cutoff_factor |
| | self.classifier_pooling = classifier_pooling |
| | self.classifier_dropout = classifier_dropout |
| | self.classifier_bias = classifier_bias |
| |
|
| | super().__init__(image_token_id=image_token_id, **kwargs) |
| |
|
| |
|
| | __all__ = ["ModernVBertConfig"] |
| |
|