File size: 4,644 Bytes
cbb1b1a
 
 
 
dc0c45b
 
 
 
cbb1b1a
dc0c45b
 
 
cbb1b1a
 
 
 
 
dc0c45b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbb1b1a
 
 
 
dc0c45b
 
 
 
cbb1b1a
 
 
 
4070852
 
 
 
 
 
 
 
 
 
cbb1b1a
 
 
dc0c45b
4070852
 
 
 
 
cbb1b1a
 
dc0c45b
 
 
 
cbb1b1a
 
 
dc0c45b
 
cbb1b1a
dc0c45b
 
 
cbb1b1a
 
dc0c45b
 
 
 
 
 
 
 
 
 
cbb1b1a
 
 
dc0c45b
 
 
cbb1b1a
4070852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc0c45b
 
 
cbb1b1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc0c45b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Pydantic request / response models for the FastAPI backend.

Endpoints and their schemas:
    GET  /api/health                       β†’ HealthResponse
    POST /api/session/create               β†’ CreateSessionResponse
    POST /api/session/{id}/message         β†’ MessageRequest β†’ MessageResponse
    POST /api/session/{id}/upload          β†’ (multipart UploadFile) β†’ UploadResponse
    POST /api/session/{id}/validate-entities β†’ ValidateEntitiesRequest β†’ MessageResponse
    GET  /api/session/{id}/history         β†’ HistoryResponse
    POST /api/classify                     β†’ ClassifyRequest β†’ ClassifyResponse (debug)
    POST /api/extract                      β†’ ExtractRequest  β†’ ExtractResponse  (debug)
"""

from pydantic import BaseModel


# ---------------------------------------------------------------------------
# Shared sub-model
# ---------------------------------------------------------------------------

class EntityOut(BaseModel):
    """Entity span in the same schema as EvidenceNER's Entity dataclass."""
    text: str
    label: str
    start: int
    end: int
    confidence: float


# ---------------------------------------------------------------------------
# Health
# ---------------------------------------------------------------------------

class HealthResponse(BaseModel):
    status: str          # "ok" | "degraded"
    components: dict     # component_name β†’ "ok" | "keyword_fallback" | "rule_fallback" | "error: …"


# ---------------------------------------------------------------------------
# Session lifecycle
# ---------------------------------------------------------------------------

class CreateSessionResponse(BaseModel):
    session_id: str


# ---------------------------------------------------------------------------
# Message
# ---------------------------------------------------------------------------

class MessageRequest(BaseModel):
    text: str


class RedactionSpanOut(BaseModel):
    """One redacted PII span, returned to the user's own browser for the
    side-by-side reveal. Never forwarded to any third-party API."""
    entity_type: str
    original: str
    placeholder: str
    start: int
    end: int


class MessageResponse(BaseModel):
    reply: str
    pii_redacted: bool
    pii_types_found: list[str]
    # Side-by-side reveal payload (Feature 1). Defaults keep older callers /
    # the validate-entities and escalation handlers (no redaction) valid.
    original_text: str = ""
    redacted_text: str = ""
    redactions: list[RedactionSpanOut] = []


# ---------------------------------------------------------------------------
# Upload
# ---------------------------------------------------------------------------

class UploadResponse(BaseModel):
    filename: str
    raw_text: str
    entities: list[EntityOut]


# ---------------------------------------------------------------------------
# HITL validate-entities
# ---------------------------------------------------------------------------

class ValidateEntitiesRequest(BaseModel):
    entities: dict   # {entity_type: corrected_value}, e.g. {"ORG": "HDFC Bank"}


# ---------------------------------------------------------------------------
# History
# ---------------------------------------------------------------------------

class HistoryMessage(BaseModel):
    role: str        # "user" | "assistant"
    content: str


class HistoryResponse(BaseModel):
    session_id: str
    history: list[HistoryMessage]


# ---------------------------------------------------------------------------
# Privacy audit trail
# ---------------------------------------------------------------------------

class AuditEntry(BaseModel):
    """One outbound/processing event in the privacy audit trail."""
    timestamp: str          # ISO-8601 UTC
    event: str              # "outbound_to_anthropic" | "document_local"
    description: str        # human-readable summary
    transmitted_text: str   # exactly what was sent externally (already redacted)
    pii_types_found: list[str]
    pii_count: int
    leak_check: str         # "passed" | "failed" | "n/a"


class AuditResponse(BaseModel):
    session_id: str
    entries: list[AuditEntry]


# ---------------------------------------------------------------------------
# Debug endpoints
# ---------------------------------------------------------------------------

class ClassifyRequest(BaseModel):
    text: str


class ClassifyResponse(BaseModel):
    domain: str
    confidence: float
    all_probs: dict


class ExtractRequest(BaseModel):
    text: str


class ExtractResponse(BaseModel):
    entities: list[EntityOut]