Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel, model_validator,field_validator, HttpUrl, Field | |
| from typing import List, Dict, Any, Optional, Union, Literal | |
| import json | |
| class QueryRequest(BaseModel): | |
| query: str | |
| # class QuerySpec(BaseModel): | |
| # raw_query: str # query of the user | |
| # intent: str # High-level purpose, e.g., "coverage_check" — helps routing aur rules. | |
| # entities: Dict[str, Union[str, List[str]]] = Field(default_factory= dict) # Extracted items (policy number, dates, amounts) — structured | |
| # constraints : Dict[str, Any] = Field(default_factory=dict) # filters like {"jurisdiction":"IN","incident_date":"2024-01-01"} | |
| # answer_type: Optional[str] = "detailed" | |
| # followups: Optional[List[str]] = Field(default_factory=list) # followups for user | |
| # @model_validator(mode = "before") | |
| # @classmethod | |
| # def parse_nested_json(cls, values): # parsing nested json to load | |
| # for field in ['entities', 'constraints']: | |
| # val = values.get(field) | |
| # if isinstance(val, str): | |
| # try: | |
| # values[field] = json.loads(val) | |
| # except json.JSONDecodeError: | |
| # pass | |
| # return values | |
| # class ClauseHit(BaseModel): | |
| # doc_id : str # id of the document | |
| # page: int # pdf page id | |
| # chunk_id: str | |
| # text: str # Evidence text used for answer. | |
| # metadata: Dict[str, Any] = Field(default_factory=dict) # metadata | |
| # score: float # Retrieval similarity score | |
| # boost: Optional[float] = None | |
| # combined_score: Optional[float] = None | |
| # @field_validator("metadata", mode="before") | |
| # def parse_metadata(cls, v): | |
| # if isinstance(v, str): | |
| # try: | |
| # return json.loads(v) if v.strip() else {} | |
| # except json.JSONDecodeError: | |
| # return {} | |
| # return v | |
| # class LogicResult(BaseModel): | |
| # answer: str | |
| # decision: str # "covered"/"not_covered"/"conditional" | |
| # confidence: float # 0..1 score for calibration/thresholding. | |
| # evidence: List[ClauseHit] = Field(default_factory=list) # List of ClauseHit used to justify the answer. | |
| # rationale: Optional[str] = None # Short human-readable reason (audit-friendly). | |
| # class HackRxRunRequest(BaseModel): | |
| # documents: HttpUrl = Field( | |
| # ..., | |
| # description="URL to the document (PDF, DOCX, or email blob)" | |
| # ) | |
| # questions: List[str] = Field( | |
| # ..., | |
| # description="List of questions to query against the document" | |
| # ) | |
| class DocumentTypeSchema(BaseModel): | |
| document_types: Literal[ | |
| "HR/Employment", | |
| "Insurance", | |
| "Legal/Compliance", | |
| "Financial/Regulatory", | |
| "Government/Public Policy", | |
| "Technical/IT Policies" | |
| ] = Field(..., description="The category/type of the document") | |