text2vector / app /models.py
emilbm's picture
init project
5a5e912
from pydantic import BaseModel, Field, field_validator, StringConstraints
from typing import Annotated
PREFIX_ACCEPTED = ["query: ", "passage: "]
ShortText = Annotated[str, StringConstraints(max_length=2000)]
class EmbedRequest(BaseModel):
"""
Request model for texts to be embedded.
Each text must start with an accepted prefix and be ≤ 2000 characters.
The texts need to start with either "query: " or "passage: ".
"""
texts: list[ShortText] = Field(
...,
json_schema_extra={
"example": [
"query: what is the capital of France?",
"passage: Paris is the capital of France.",
]
},
description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.",
)
@field_validator("texts")
@classmethod
def check_prefixes(cls, texts: list[str]) -> list[str]:
for t in texts:
if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED):
raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}")
return texts
class EmbedResponse(BaseModel):
"""Response model containing embeddings."""
embeddings: list[list[float]] = Field(
...,
description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.",
)