from pydantic import BaseModel, Field, field_validator, StringConstraints from typing import Annotated PREFIX_ACCEPTED = ["query: ", "passage: "] ShortText = Annotated[str, StringConstraints(max_length=2000)] class EmbedRequest(BaseModel): """ Request model for texts to be embedded. Each text must start with an accepted prefix and be ≤ 2000 characters. The texts need to start with either "query: " or "passage: ". """ texts: list[ShortText] = Field( ..., json_schema_extra={ "example": [ "query: what is the capital of France?", "passage: Paris is the capital of France.", ] }, description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.", ) @field_validator("texts") @classmethod def check_prefixes(cls, texts: list[str]) -> list[str]: for t in texts: if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED): raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}") return texts class EmbedResponse(BaseModel): """Response model containing embeddings.""" embeddings: list[list[float]] = Field( ..., description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.", )