File size: 1,866 Bytes
372fe0c
 
d470d45
 
 
 
 
372fe0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d470d45
2288fd7
 
 
 
 
 
 
 
 
 
 
d470d45
 
 
 
 
 
 
 
 
 
 
372fe0c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from typing import Literal

from pydantic import BaseModel, Field


class ExtractRequest(BaseModel):
    text: str

    # labels 可选:空列表 → 服务端自动使用内置双语标签集
    labels: list[str] = Field(
        default_factory=list,
        description=(
            "Entity type labels. Leave empty to use built-in bilingual defaults. "
            "Bilingual pairs (e.g. '人名或姓名' + 'full name of a person') are "
            "automatically expanded to improve recall on Chinese / mixed text."
        ),
    )

    threshold: float = Field(
        default=0.4,
        ge=0.0,
        le=1.0,
        description=(
            "Minimum confidence score. "
            "Lower values yield more entities; higher values yield fewer but more precise ones. "
            "Default 0.4 works well for multilingual text."
        ),
    )

    language: Literal["auto", "en", "zh", "ar", "mixed"] = Field(
        default="auto",
        description=(
            "Hint for language-aware processing. "
            "'auto' detects from the text automatically."
        ),
    )

    min_entities: int | None = Field(
        default=None,
        ge=0,
        description=(
            "Minimum entity count for the primary model to be considered 'sufficient'. "
            "If the primary returns fewer than this, the fallback model is invoked and "
            "its results are MERGED with the primary's (not replaced). "
            "Leave null/omit to auto-calculate from text length and label count."
        ),
    )


class Entity(BaseModel):
    text: str
    label: str
    score: float
    start: int
    end: int


class ExtractResponse(BaseModel):
    entities: list[Entity]
    # Echo back which labels were actually used (useful when labels=[] → defaults applied)
    labels_used: list[str] = Field(default_factory=list)