Spaces:
Running
Running
| """ | |
| Pydantic models for regulatory change detection. | |
| """ | |
| from typing import List, Optional | |
| import uuid | |
| from pydantic import BaseModel, Field, field_validator | |
| class Grounding(BaseModel): | |
| """Location information for a change in the document.""" | |
| page: int = Field(..., ge=-2, description="Zero-indexed page number") | |
| line: int = Field(..., ge=-2, description="Zero-indexed line number") | |
| class Action(BaseModel): | |
| """Action item for a regulatory change.""" | |
| label: str = Field(..., description="Description of the action") | |
| completed: bool = Field(default=False, description="Whether the action is completed") | |
| class RegulatoryChange(BaseModel): | |
| """Represents a detected regulatory change in a document.""" | |
| text: str = Field(..., description="The text content of the change") | |
| confirmed: bool = Field(default=False, description="Whether the change has been confirmed") | |
| validated: bool = Field(default=False, description="Whether the change has been validated") | |
| reviewed: bool = Field(default=False, description="Whether the change has been reviewed") | |
| category: str = Field(default="", description="Category of the change") | |
| type: str = Field(default="", description="Type of change (addition, modification, deletion)") | |
| context: str = Field(default="", description="Additional context about the change") | |
| grounding: List[Grounding] = Field(default_factory=list, description="Location references in the document") | |
| occurrence_index: Optional[int] = Field(default=None, description="Index of occurrence for duplicate text") | |
| actions: List[Action] = Field(default_factory=list, description="Actions from categorical impact assessment") | |
| chunk_id: str = Field(default_factory=lambda: uuid.uuid4().hex, description="Unique identifier for the change") | |
| repository: Optional[str] = Field(default=None, description="Repository associated with the change, for now for testing the RAG, not used later i think") | |
| def validate_category(cls, v: str) -> str: | |
| """Validate that category is one of the allowed values.""" | |
| valid_categories = [ | |
| "", | |
| "Select", | |
| "Compliance and Enforcement Changes", | |
| "Data and Field Changes", | |
| "Policy Changes", | |
| "Procedural Changes", | |
| "Regional-Specific Changes", | |
| "Sector-Specific Changes", | |
| "Stakeholder-Specific Changes", | |
| "Temporary Changes", | |
| "Textual and Editorial Changes", | |
| ] | |
| if v not in valid_categories: | |
| # Don't fail validation, just return as-is for backwards compatibility | |
| return v | |
| return v | |
| def validate_type(cls, v: str) -> str: | |
| """Validate that type is one of the allowed values.""" | |
| valid_types = ["", "Select", "addition", "modification", "deletion", "unspecified"] | |
| if v not in valid_types: | |
| # Don't fail validation, just return as-is for backwards compatibility | |
| return v | |
| return v | |
| def to_dict(self) -> dict: | |
| """Convert model to dictionary format.""" | |
| return self.model_dump() | |
| def from_dict(cls, data: dict) -> "RegulatoryChange": | |
| """Create model from dictionary format.""" | |
| # Handle old format without actions field | |
| # if "chunk_id" not in data: | |
| # data["chunk_id"] = uuid.uuid4().hex | |
| if "actions" not in data: | |
| data["actions"] = [] | |
| # Convert grounding list of dicts to Grounding objects | |
| if "grounding" in data and data["grounding"]: | |
| data["grounding"] = [Grounding(**g) if isinstance(g, dict) else g for g in data["grounding"]] | |
| return cls(**data) | |
| class CategoricalImpactEntry(BaseModel): | |
| """Entry in the categorical impact assessment table.""" | |
| id: str = Field(..., description="Unique identifier for the entry") | |
| reg_change: str = Field(..., description="Regulation change reference") | |
| category: str = Field(..., description="Category of the change") | |
| artifacts: str = Field(..., description="Affected artifacts") | |
| impacts: str = Field(..., description="General impacts") | |
| actions: List[Action] = Field(default_factory=list, description="Action items") | |
| reviewed: bool = Field(default=False, description="Whether the entry has been reviewed") | |
| class ProductImpactEntry(BaseModel): | |
| """Entry in the product impact assessment table.""" | |
| id: str = Field(..., description="Unique identifier for the entry") | |
| reg_change: str = Field(..., description="Regulation change reference") | |
| requirement: str = Field(..., description="Requirement description") | |
| system_linked_component: str = Field(..., description="System component affected") | |
| location: str = Field(..., description="Location in the system") | |
| reason: str = Field(..., description="Reason for the impact") | |
| confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence level (0-1)") | |