Spaces:
Sleeping
Sleeping
File size: 1,421 Bytes
5a5e912 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from pydantic import BaseModel, Field, field_validator, StringConstraints
from typing import Annotated
PREFIX_ACCEPTED = ["query: ", "passage: "]
ShortText = Annotated[str, StringConstraints(max_length=2000)]
class EmbedRequest(BaseModel):
"""
Request model for texts to be embedded.
Each text must start with an accepted prefix and be ≤ 2000 characters.
The texts need to start with either "query: " or "passage: ".
"""
texts: list[ShortText] = Field(
...,
json_schema_extra={
"example": [
"query: what is the capital of France?",
"passage: Paris is the capital of France.",
]
},
description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.",
)
@field_validator("texts")
@classmethod
def check_prefixes(cls, texts: list[str]) -> list[str]:
for t in texts:
if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED):
raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}")
return texts
class EmbedResponse(BaseModel):
"""Response model containing embeddings."""
embeddings: list[list[float]] = Field(
...,
description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.",
)
|