from typing import TYPE_CHECKING if TYPE_CHECKING: from vllm.config import VllmConfig # ty: ignore[unresolved-import] from vllm.model_executor.models.config import VerifyAndUpdateConfig # ty: ignore[unresolved-import] class EmbedderModelConfig(VerifyAndUpdateConfig): @staticmethod def verify_and_update_config(vllm_config: "VllmConfig") -> None: from copy import deepcopy from vllm.transformers_utils.config import set_default_rope_theta # ty: ignore[unresolved-import] config = vllm_config.model_config.hf_config assert config.__class__.__name__ == "EmbedderConfig" # nosec B101 assert config.activation_function in ["swiglu", "gelu"] # nosec B101 config.position_embedding_type = getattr(config, "position_embedding_type", "rope") if config.activation_function == "swiglu": config.hidden_act = "silu" else: config.hidden_act = config.activation_function assert config.mlp_fc1_bias == config.mlp_fc2_bias == config.qkv_proj_bias # nosec B101 config.bias = config.qkv_proj_bias assert config.rotary_emb_scale_base is None # nosec B101 assert not config.rotary_emb_interleaved # nosec B101 config.layer_norm_eps = config.layer_norm_epsilon config.intermediate_size = config.n_inner config.hidden_size = config.n_embd config.num_hidden_layers = config.n_layer head_dim = config.hidden_size // config.num_attention_heads rotary_emb_dim = int(head_dim * config.rotary_emb_fraction) max_trained_positions = getattr(config, "max_trained_positions", 2048) set_default_rope_theta(config, default_theta=config.rotary_emb_base) config.rotary_kwargs = { "head_size": head_dim, "rotary_dim": rotary_emb_dim, "max_position": max_trained_positions, "rope_parameters": config.rope_parameters, } # we ignore config.rotary_scaling_factor so that for datasets shorter # than max_trained_positions 2048, the results are consistent # with SentenceTransformer. # The context extension uses vllm style rope_theta and rope_parameters. # See #17785 #18755 if not vllm_config.model_config.hf_overrides and vllm_config.model_config.original_max_model_len is None: # Default # Reset max_model_len to max_trained_positions. # nomic-embed-text-v2-moe the length is set to 512 # by sentence_bert_config.json. max_model_len = min(vllm_config.model_config.max_model_len, max_trained_positions) # type: ignore[unreachable] vllm_config.recalculate_max_model_len(max_model_len) else: # We need to re-verify max_model_len to avoid lengths # greater than position_embedding. model_config = vllm_config.model_config hf_text_config = model_config.hf_text_config if isinstance(model_config.hf_overrides, dict): # hf_overrides_kw max_model_len = model_config.hf_overrides.get("max_model_len", vllm_config.model_config.max_model_len) else: # hf_overrides_fn # This might be overridden by sentence_bert_config.json. max_model_len = vllm_config.model_config.max_model_len # reset hf_text_config for recalculate_max_model_len. if hasattr(hf_text_config, "max_model_len"): delattr(hf_text_config, "max_model_len") hf_text_config.max_position_embeddings = max_trained_positions hf_text_config.rope_parameters = config.rotary_kwargs["rope_parameters"] # The priority of sentence_bert_config.json is higher # than max_position_embeddings encoder_config = deepcopy(model_config.encoder_config) if encoder_config: encoder_config.pop("max_seq_length", None) model_config.encoder_config = encoder_config vllm_config.recalculate_max_model_len(max_model_len)