File size: 2,867 Bytes
e5b884f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from pydantic import BaseModel, model_validator,field_validator, HttpUrl, Field
from typing import List, Dict, Any, Optional, Union, Literal
import json
class QueryRequest(BaseModel):
    query: str

    



# class QuerySpec(BaseModel):
#     raw_query: str # query of the user
#     intent: str # High-level purpose, e.g., "coverage_check" — helps routing aur rules.
#     entities: Dict[str, Union[str, List[str]]] = Field(default_factory= dict) # Extracted items (policy number, dates, amounts) — structured
#     constraints : Dict[str, Any] = Field(default_factory=dict) # filters like {"jurisdiction":"IN","incident_date":"2024-01-01"}
#     answer_type: Optional[str] = "detailed" 
#     followups: Optional[List[str]] = Field(default_factory=list) # followups for user

#     @model_validator(mode = "before")
#     @classmethod
#     def parse_nested_json(cls, values): # parsing nested json to load
#         for field in ['entities', 'constraints']:
#             val = values.get(field)
#             if isinstance(val, str):
#                 try:
#                     values[field] = json.loads(val)
#                 except json.JSONDecodeError:
#                     pass
#         return values

# class ClauseHit(BaseModel):
#     doc_id : str # id of the document
#     page: int # pdf page id 
#     chunk_id: str  
#     text: str # Evidence text used for answer.
#     metadata: Dict[str, Any] = Field(default_factory=dict) # metadata
#     score: float  # Retrieval similarity score
#     boost: Optional[float] = None
#     combined_score: Optional[float] = None

#     @field_validator("metadata", mode="before")
#     def parse_metadata(cls, v):
#         if isinstance(v, str):
#             try:
#                 return json.loads(v) if v.strip() else {}
#             except json.JSONDecodeError:
#                 return {}
#         return v

# class LogicResult(BaseModel):
#     answer: str
#     decision: str # "covered"/"not_covered"/"conditional"
#     confidence: float # 0..1 score for calibration/thresholding.
#     evidence: List[ClauseHit]  = Field(default_factory=list) # List of ClauseHit used to justify the answer.
#     rationale: Optional[str] = None # Short human-readable reason (audit-friendly).
    
# class HackRxRunRequest(BaseModel):
#     documents: HttpUrl = Field(
#         ...,
#         description="URL to the document (PDF, DOCX, or email blob)"
#     )
#     questions: List[str] = Field(
#         ...,
#         description="List of questions to query against the document"
#     )

class DocumentTypeSchema(BaseModel):
    document_types: Literal[
        "HR/Employment",
        "Insurance",
        "Legal/Compliance",
        "Financial/Regulatory",
        "Government/Public Policy",
        "Technical/IT Policies"
    ] = Field(..., description="The category/type of the document")