| from typing import Literal |
|
|
| from pydantic import BaseModel, Field |
|
|
|
|
| class ExtractRequest(BaseModel): |
| text: str |
|
|
| |
| labels: list[str] = Field( |
| default_factory=list, |
| description=( |
| "Entity type labels. Leave empty to use built-in bilingual defaults. " |
| "Bilingual pairs (e.g. '人名或姓名' + 'full name of a person') are " |
| "automatically expanded to improve recall on Chinese / mixed text." |
| ), |
| ) |
|
|
| threshold: float = Field( |
| default=0.4, |
| ge=0.0, |
| le=1.0, |
| description=( |
| "Minimum confidence score. " |
| "Lower values yield more entities; higher values yield fewer but more precise ones. " |
| "Default 0.4 works well for multilingual text." |
| ), |
| ) |
|
|
| language: Literal["auto", "en", "zh", "ar", "mixed"] = Field( |
| default="auto", |
| description=( |
| "Hint for language-aware processing. " |
| "'auto' detects from the text automatically." |
| ), |
| ) |
|
|
| min_entities: int | None = Field( |
| default=None, |
| ge=0, |
| description=( |
| "Minimum entity count for the primary model to be considered 'sufficient'. " |
| "If the primary returns fewer than this, the fallback model is invoked and " |
| "its results are MERGED with the primary's (not replaced). " |
| "Leave null/omit to auto-calculate from text length and label count." |
| ), |
| ) |
|
|
|
|
| class Entity(BaseModel): |
| text: str |
| label: str |
| score: float |
| start: int |
| end: int |
|
|
|
|
| class ExtractResponse(BaseModel): |
| entities: list[Entity] |
| |
| labels_used: list[str] = Field(default_factory=list) |
|
|