""" Pydantic models for regulatory change detection. """ from typing import List, Optional import uuid from pydantic import BaseModel, Field, field_validator class Grounding(BaseModel): """Location information for a change in the document.""" page: int = Field(..., ge=-2, description="Zero-indexed page number") line: int = Field(..., ge=-2, description="Zero-indexed line number") class Action(BaseModel): """Action item for a regulatory change.""" label: str = Field(..., description="Description of the action") completed: bool = Field(default=False, description="Whether the action is completed") class RegulatoryChange(BaseModel): """Represents a detected regulatory change in a document.""" text: str = Field(..., description="The text content of the change") confirmed: bool = Field(default=False, description="Whether the change has been confirmed") validated: bool = Field(default=False, description="Whether the change has been validated") reviewed: bool = Field(default=False, description="Whether the change has been reviewed") category: str = Field(default="", description="Category of the change") type: str = Field(default="", description="Type of change (addition, modification, deletion)") context: str = Field(default="", description="Additional context about the change") grounding: List[Grounding] = Field(default_factory=list, description="Location references in the document") occurrence_index: Optional[int] = Field(default=None, description="Index of occurrence for duplicate text") actions: List[Action] = Field(default_factory=list, description="Actions from categorical impact assessment") chunk_id: str = Field(default_factory=lambda: uuid.uuid4().hex, description="Unique identifier for the change") repository: Optional[str] = Field(default=None, description="Repository associated with the change, for now for testing the RAG, not used later i think") @field_validator('category') @classmethod def validate_category(cls, v: str) -> str: """Validate that category is one of the allowed values.""" valid_categories = [ "", "Select", "Compliance and Enforcement Changes", "Data and Field Changes", "Policy Changes", "Procedural Changes", "Regional-Specific Changes", "Sector-Specific Changes", "Stakeholder-Specific Changes", "Temporary Changes", "Textual and Editorial Changes", ] if v not in valid_categories: # Don't fail validation, just return as-is for backwards compatibility return v return v @field_validator('type') @classmethod def validate_type(cls, v: str) -> str: """Validate that type is one of the allowed values.""" valid_types = ["", "Select", "addition", "modification", "deletion", "unspecified"] if v not in valid_types: # Don't fail validation, just return as-is for backwards compatibility return v return v def to_dict(self) -> dict: """Convert model to dictionary format.""" return self.model_dump() @classmethod def from_dict(cls, data: dict) -> "RegulatoryChange": """Create model from dictionary format.""" # Handle old format without actions field # if "chunk_id" not in data: # data["chunk_id"] = uuid.uuid4().hex if "actions" not in data: data["actions"] = [] # Convert grounding list of dicts to Grounding objects if "grounding" in data and data["grounding"]: data["grounding"] = [Grounding(**g) if isinstance(g, dict) else g for g in data["grounding"]] return cls(**data) class CategoricalImpactEntry(BaseModel): """Entry in the categorical impact assessment table.""" id: str = Field(..., description="Unique identifier for the entry") reg_change: str = Field(..., description="Regulation change reference") category: str = Field(..., description="Category of the change") artifacts: str = Field(..., description="Affected artifacts") impacts: str = Field(..., description="General impacts") actions: List[Action] = Field(default_factory=list, description="Action items") reviewed: bool = Field(default=False, description="Whether the entry has been reviewed") class ProductImpactEntry(BaseModel): """Entry in the product impact assessment table.""" id: str = Field(..., description="Unique identifier for the entry") reg_change: str = Field(..., description="Regulation change reference") requirement: str = Field(..., description="Requirement description") system_linked_component: str = Field(..., description="System component affected") location: str = Field(..., description="Location in the system") reason: str = Field(..., description="Reason for the impact") confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence level (0-1)")