Instructions to use JalalKhal/test-api with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use JalalKhal/test-api with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("JalalKhal/test-api", trust_remote_code=True) sentences = [ "That is a happy person", "That is a happy dog", "That is a very happy person", "Today is a sunny day" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
| from typing import TYPE_CHECKING | |
| if TYPE_CHECKING: | |
| from vllm.config import VllmConfig # ty: ignore[unresolved-import] | |
| from vllm.model_executor.models.config import VerifyAndUpdateConfig # ty: ignore[unresolved-import] | |
| class EmbedderModelConfig(VerifyAndUpdateConfig): | |
| def verify_and_update_config(vllm_config: "VllmConfig") -> None: | |
| from copy import deepcopy | |
| from vllm.transformers_utils.config import set_default_rope_theta # ty: ignore[unresolved-import] | |
| config = vllm_config.model_config.hf_config | |
| assert config.__class__.__name__ == "EmbedderConfig" # nosec B101 | |
| assert config.activation_function in ["swiglu", "gelu"] # nosec B101 | |
| config.position_embedding_type = getattr(config, "position_embedding_type", "rope") | |
| if config.activation_function == "swiglu": | |
| config.hidden_act = "silu" | |
| else: | |
| config.hidden_act = config.activation_function | |
| assert config.mlp_fc1_bias == config.mlp_fc2_bias == config.qkv_proj_bias # nosec B101 | |
| config.bias = config.qkv_proj_bias | |
| assert config.rotary_emb_scale_base is None # nosec B101 | |
| assert not config.rotary_emb_interleaved # nosec B101 | |
| config.layer_norm_eps = config.layer_norm_epsilon | |
| config.intermediate_size = config.n_inner | |
| config.hidden_size = config.n_embd | |
| config.num_hidden_layers = config.n_layer | |
| head_dim = config.hidden_size // config.num_attention_heads | |
| rotary_emb_dim = int(head_dim * config.rotary_emb_fraction) | |
| max_trained_positions = getattr(config, "max_trained_positions", 2048) | |
| set_default_rope_theta(config, default_theta=config.rotary_emb_base) | |
| config.rotary_kwargs = { | |
| "head_size": head_dim, | |
| "rotary_dim": rotary_emb_dim, | |
| "max_position": max_trained_positions, | |
| "rope_parameters": config.rope_parameters, | |
| } | |
| # we ignore config.rotary_scaling_factor so that for datasets shorter | |
| # than max_trained_positions 2048, the results are consistent | |
| # with SentenceTransformer. | |
| # The context extension uses vllm style rope_theta and rope_parameters. | |
| # See #17785 #18755 | |
| if not vllm_config.model_config.hf_overrides and vllm_config.model_config.original_max_model_len is None: | |
| # Default | |
| # Reset max_model_len to max_trained_positions. | |
| # nomic-embed-text-v2-moe the length is set to 512 | |
| # by sentence_bert_config.json. | |
| max_model_len = min(vllm_config.model_config.max_model_len, max_trained_positions) # type: ignore[unreachable] | |
| vllm_config.recalculate_max_model_len(max_model_len) | |
| else: | |
| # We need to re-verify max_model_len to avoid lengths | |
| # greater than position_embedding. | |
| model_config = vllm_config.model_config | |
| hf_text_config = model_config.hf_text_config | |
| if isinstance(model_config.hf_overrides, dict): | |
| # hf_overrides_kw | |
| max_model_len = model_config.hf_overrides.get("max_model_len", vllm_config.model_config.max_model_len) | |
| else: | |
| # hf_overrides_fn | |
| # This might be overridden by sentence_bert_config.json. | |
| max_model_len = vllm_config.model_config.max_model_len | |
| # reset hf_text_config for recalculate_max_model_len. | |
| if hasattr(hf_text_config, "max_model_len"): | |
| delattr(hf_text_config, "max_model_len") | |
| hf_text_config.max_position_embeddings = max_trained_positions | |
| hf_text_config.rope_parameters = config.rotary_kwargs["rope_parameters"] | |
| # The priority of sentence_bert_config.json is higher | |
| # than max_position_embeddings | |
| encoder_config = deepcopy(model_config.encoder_config) | |
| if encoder_config: | |
| encoder_config.pop("max_seq_length", None) | |
| model_config.encoder_config = encoder_config | |
| vllm_config.recalculate_max_model_len(max_model_len) | |