File size: 1,866 Bytes
372fe0c d470d45 372fe0c d470d45 2288fd7 d470d45 372fe0c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | from typing import Literal
from pydantic import BaseModel, Field
class ExtractRequest(BaseModel):
text: str
# labels 可选:空列表 → 服务端自动使用内置双语标签集
labels: list[str] = Field(
default_factory=list,
description=(
"Entity type labels. Leave empty to use built-in bilingual defaults. "
"Bilingual pairs (e.g. '人名或姓名' + 'full name of a person') are "
"automatically expanded to improve recall on Chinese / mixed text."
),
)
threshold: float = Field(
default=0.4,
ge=0.0,
le=1.0,
description=(
"Minimum confidence score. "
"Lower values yield more entities; higher values yield fewer but more precise ones. "
"Default 0.4 works well for multilingual text."
),
)
language: Literal["auto", "en", "zh", "ar", "mixed"] = Field(
default="auto",
description=(
"Hint for language-aware processing. "
"'auto' detects from the text automatically."
),
)
min_entities: int | None = Field(
default=None,
ge=0,
description=(
"Minimum entity count for the primary model to be considered 'sufficient'. "
"If the primary returns fewer than this, the fallback model is invoked and "
"its results are MERGED with the primary's (not replaced). "
"Leave null/omit to auto-calculate from text length and label count."
),
)
class Entity(BaseModel):
text: str
label: str
score: float
start: int
end: int
class ExtractResponse(BaseModel):
entities: list[Entity]
# Echo back which labels were actually used (useful when labels=[] → defaults applied)
labels_used: list[str] = Field(default_factory=list)
|