Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel, Field, field_validator, StringConstraints | |
| from typing import Annotated | |
| PREFIX_ACCEPTED = ["query: ", "passage: "] | |
| ShortText = Annotated[str, StringConstraints(max_length=2000)] | |
| class EmbedRequest(BaseModel): | |
| """ | |
| Request model for texts to be embedded. | |
| Each text must start with an accepted prefix and be ≤ 2000 characters. | |
| The texts need to start with either "query: " or "passage: ". | |
| """ | |
| texts: list[ShortText] = Field( | |
| ..., | |
| json_schema_extra={ | |
| "example": [ | |
| "query: what is the capital of France?", | |
| "passage: Paris is the capital of France.", | |
| ] | |
| }, | |
| description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.", | |
| ) | |
| def check_prefixes(cls, texts: list[str]) -> list[str]: | |
| for t in texts: | |
| if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED): | |
| raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}") | |
| return texts | |
| class EmbedResponse(BaseModel): | |
| """Response model containing embeddings.""" | |
| embeddings: list[list[float]] = Field( | |
| ..., | |
| description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.", | |
| ) | |