Spaces:

emilbm
/

text2vector

Sleeping

text2vector / app /models.py

init project

5a5e912 3 months ago

1.42 kB

	from pydantic import BaseModel, Field, field_validator, StringConstraints
	from typing import Annotated

	PREFIX_ACCEPTED = ["query: ", "passage: "]

	ShortText = Annotated[str, StringConstraints(max_length=2000)]


	class EmbedRequest(BaseModel):
	"""
	Request model for texts to be embedded.
	Each text must start with an accepted prefix and be ≤ 2000 characters.
	The texts need to start with either "query: " or "passage: ".
	"""

	texts: list[ShortText] = Field(
	...,
	json_schema_extra={
	"example": [
	"query: what is the capital of France?",
	"passage: Paris is the capital of France.",
	]
	},
	description="List of texts to be embedded (≤ 2000 characters each) and must start with 'query: ' or 'passage: '.",
	)

	@field_validator("texts")
	@classmethod
	def check_prefixes(cls, texts: list[str]) -> list[str]:
	for t in texts:
	if not any(t.startswith(prefix) for prefix in PREFIX_ACCEPTED):
	raise ValueError(f"Each text must start with one of {PREFIX_ACCEPTED}")
	return texts


	class EmbedResponse(BaseModel):
	"""Response model containing embeddings."""

	embeddings: list[list[float]] = Field(
	...,
	description="List of embedding vectors corresponding to the input texts. Each embedding is a list of floats with length 1024.",
	)