Spaces:
Sleeping
Sleeping
File size: 2,867 Bytes
e5b884f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | from pydantic import BaseModel, model_validator,field_validator, HttpUrl, Field
from typing import List, Dict, Any, Optional, Union, Literal
import json
class QueryRequest(BaseModel):
query: str
# class QuerySpec(BaseModel):
# raw_query: str # query of the user
# intent: str # High-level purpose, e.g., "coverage_check" — helps routing aur rules.
# entities: Dict[str, Union[str, List[str]]] = Field(default_factory= dict) # Extracted items (policy number, dates, amounts) — structured
# constraints : Dict[str, Any] = Field(default_factory=dict) # filters like {"jurisdiction":"IN","incident_date":"2024-01-01"}
# answer_type: Optional[str] = "detailed"
# followups: Optional[List[str]] = Field(default_factory=list) # followups for user
# @model_validator(mode = "before")
# @classmethod
# def parse_nested_json(cls, values): # parsing nested json to load
# for field in ['entities', 'constraints']:
# val = values.get(field)
# if isinstance(val, str):
# try:
# values[field] = json.loads(val)
# except json.JSONDecodeError:
# pass
# return values
# class ClauseHit(BaseModel):
# doc_id : str # id of the document
# page: int # pdf page id
# chunk_id: str
# text: str # Evidence text used for answer.
# metadata: Dict[str, Any] = Field(default_factory=dict) # metadata
# score: float # Retrieval similarity score
# boost: Optional[float] = None
# combined_score: Optional[float] = None
# @field_validator("metadata", mode="before")
# def parse_metadata(cls, v):
# if isinstance(v, str):
# try:
# return json.loads(v) if v.strip() else {}
# except json.JSONDecodeError:
# return {}
# return v
# class LogicResult(BaseModel):
# answer: str
# decision: str # "covered"/"not_covered"/"conditional"
# confidence: float # 0..1 score for calibration/thresholding.
# evidence: List[ClauseHit] = Field(default_factory=list) # List of ClauseHit used to justify the answer.
# rationale: Optional[str] = None # Short human-readable reason (audit-friendly).
# class HackRxRunRequest(BaseModel):
# documents: HttpUrl = Field(
# ...,
# description="URL to the document (PDF, DOCX, or email blob)"
# )
# questions: List[str] = Field(
# ...,
# description="List of questions to query against the document"
# )
class DocumentTypeSchema(BaseModel):
document_types: Literal[
"HR/Employment",
"Insurance",
"Legal/Compliance",
"Financial/Regulatory",
"Government/Public Policy",
"Technical/IT Policies"
] = Field(..., description="The category/type of the document")
|