Spaces:
Running
Running
File size: 5,067 Bytes
3fef715 a222dc6 3fef715 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | """
Pydantic models for regulatory change detection.
"""
from typing import List, Optional
import uuid
from pydantic import BaseModel, Field, field_validator
class Grounding(BaseModel):
"""Location information for a change in the document."""
page: int = Field(..., ge=-2, description="Zero-indexed page number")
line: int = Field(..., ge=-2, description="Zero-indexed line number")
class Action(BaseModel):
"""Action item for a regulatory change."""
label: str = Field(..., description="Description of the action")
completed: bool = Field(default=False, description="Whether the action is completed")
class RegulatoryChange(BaseModel):
"""Represents a detected regulatory change in a document."""
text: str = Field(..., description="The text content of the change")
confirmed: bool = Field(default=False, description="Whether the change has been confirmed")
validated: bool = Field(default=False, description="Whether the change has been validated")
reviewed: bool = Field(default=False, description="Whether the change has been reviewed")
category: str = Field(default="", description="Category of the change")
type: str = Field(default="", description="Type of change (addition, modification, deletion)")
context: str = Field(default="", description="Additional context about the change")
grounding: List[Grounding] = Field(default_factory=list, description="Location references in the document")
occurrence_index: Optional[int] = Field(default=None, description="Index of occurrence for duplicate text")
actions: List[Action] = Field(default_factory=list, description="Actions from categorical impact assessment")
chunk_id: str = Field(default_factory=lambda: uuid.uuid4().hex, description="Unique identifier for the change")
repository: Optional[str] = Field(default=None, description="Repository associated with the change, for now for testing the RAG, not used later i think")
@field_validator('category')
@classmethod
def validate_category(cls, v: str) -> str:
"""Validate that category is one of the allowed values."""
valid_categories = [
"",
"Select",
"Compliance and Enforcement Changes",
"Data and Field Changes",
"Policy Changes",
"Procedural Changes",
"Regional-Specific Changes",
"Sector-Specific Changes",
"Stakeholder-Specific Changes",
"Temporary Changes",
"Textual and Editorial Changes",
]
if v not in valid_categories:
# Don't fail validation, just return as-is for backwards compatibility
return v
return v
@field_validator('type')
@classmethod
def validate_type(cls, v: str) -> str:
"""Validate that type is one of the allowed values."""
valid_types = ["", "Select", "addition", "modification", "deletion", "unspecified"]
if v not in valid_types:
# Don't fail validation, just return as-is for backwards compatibility
return v
return v
def to_dict(self) -> dict:
"""Convert model to dictionary format."""
return self.model_dump()
@classmethod
def from_dict(cls, data: dict) -> "RegulatoryChange":
"""Create model from dictionary format."""
# Handle old format without actions field
# if "chunk_id" not in data:
# data["chunk_id"] = uuid.uuid4().hex
if "actions" not in data:
data["actions"] = []
# Convert grounding list of dicts to Grounding objects
if "grounding" in data and data["grounding"]:
data["grounding"] = [Grounding(**g) if isinstance(g, dict) else g for g in data["grounding"]]
return cls(**data)
class CategoricalImpactEntry(BaseModel):
"""Entry in the categorical impact assessment table."""
id: str = Field(..., description="Unique identifier for the entry")
reg_change: str = Field(..., description="Regulation change reference")
category: str = Field(..., description="Category of the change")
artifacts: str = Field(..., description="Affected artifacts")
impacts: str = Field(..., description="General impacts")
actions: List[Action] = Field(default_factory=list, description="Action items")
reviewed: bool = Field(default=False, description="Whether the entry has been reviewed")
class ProductImpactEntry(BaseModel):
"""Entry in the product impact assessment table."""
id: str = Field(..., description="Unique identifier for the entry")
reg_change: str = Field(..., description="Regulation change reference")
requirement: str = Field(..., description="Requirement description")
system_linked_component: str = Field(..., description="System component affected")
location: str = Field(..., description="Location in the system")
reason: str = Field(..., description="Reason for the impact")
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence level (0-1)")
|