Spaces:

RobinWu
/

nerserver

Running

File size: 1,866 Bytes

from typing import Literal

from pydantic import BaseModel, Field


class ExtractRequest(BaseModel):
    text: str

    # labels 可选：空列表 → 服务端自动使用内置双语标签集
    labels: list[str] = Field(
        default_factory=list,
        description=(
            "Entity type labels. Leave empty to use built-in bilingual defaults. "
            "Bilingual pairs (e.g. '人名或姓名' + 'full name of a person') are "
            "automatically expanded to improve recall on Chinese / mixed text."
        ),
    )

    threshold: float = Field(
        default=0.4,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum confidence score. "
            "Lower values yield more entities; higher values yield fewer but more precise ones. "
            "Default 0.4 works well for multilingual text."
        ),
    )

    language: Literal["auto", "en", "zh", "ar", "mixed"] = Field(
        default="auto",
        description=(
            "Hint for language-aware processing. "
            "'auto' detects from the text automatically."
        ),
    )

    min_entities: int | None = Field(
        default=None,
        ge=0,
        description=(
            "Minimum entity count for the primary model to be considered 'sufficient'. "
            "If the primary returns fewer than this, the fallback model is invoked and "
            "its results are MERGED with the primary's (not replaced). "
            "Leave null/omit to auto-calculate from text length and label count."
        ),
    )


class Entity(BaseModel):
    text: str
    label: str
    score: float
    start: int
    end: int


class ExtractResponse(BaseModel):
    entities: list[Entity]
    # Echo back which labels were actually used (useful when labels=[] → defaults applied)
    labels_used: list[str] = Field(default_factory=list)