from pydantic import BaseModel, model_validator,field_validator, HttpUrl, Field from typing import List, Dict, Any, Optional, Union, Literal import json class QueryRequest(BaseModel): query: str # class QuerySpec(BaseModel): # raw_query: str # query of the user # intent: str # High-level purpose, e.g., "coverage_check" — helps routing aur rules. # entities: Dict[str, Union[str, List[str]]] = Field(default_factory= dict) # Extracted items (policy number, dates, amounts) — structured # constraints : Dict[str, Any] = Field(default_factory=dict) # filters like {"jurisdiction":"IN","incident_date":"2024-01-01"} # answer_type: Optional[str] = "detailed" # followups: Optional[List[str]] = Field(default_factory=list) # followups for user # @model_validator(mode = "before") # @classmethod # def parse_nested_json(cls, values): # parsing nested json to load # for field in ['entities', 'constraints']: # val = values.get(field) # if isinstance(val, str): # try: # values[field] = json.loads(val) # except json.JSONDecodeError: # pass # return values # class ClauseHit(BaseModel): # doc_id : str # id of the document # page: int # pdf page id # chunk_id: str # text: str # Evidence text used for answer. # metadata: Dict[str, Any] = Field(default_factory=dict) # metadata # score: float # Retrieval similarity score # boost: Optional[float] = None # combined_score: Optional[float] = None # @field_validator("metadata", mode="before") # def parse_metadata(cls, v): # if isinstance(v, str): # try: # return json.loads(v) if v.strip() else {} # except json.JSONDecodeError: # return {} # return v # class LogicResult(BaseModel): # answer: str # decision: str # "covered"/"not_covered"/"conditional" # confidence: float # 0..1 score for calibration/thresholding. # evidence: List[ClauseHit] = Field(default_factory=list) # List of ClauseHit used to justify the answer. # rationale: Optional[str] = None # Short human-readable reason (audit-friendly). # class HackRxRunRequest(BaseModel): # documents: HttpUrl = Field( # ..., # description="URL to the document (PDF, DOCX, or email blob)" # ) # questions: List[str] = Field( # ..., # description="List of questions to query against the document" # ) class DocumentTypeSchema(BaseModel): document_types: Literal[ "HR/Employment", "Insurance", "Legal/Compliance", "Financial/Regulatory", "Government/Public Policy", "Technical/IT Policies" ] = Field(..., description="The category/type of the document")