| """Transaction models for reversible DataForge repairs.""" |
|
|
| from __future__ import annotations |
|
|
| import secrets |
| from datetime import UTC, datetime |
| from typing import Annotated, Literal |
|
|
| from pydantic import BaseModel, Field, field_validator |
|
|
| TxnId = Annotated[str, Field(pattern=r"^txn-\d{4}-\d{2}-\d{2}-[0-9a-f]{6}$")] |
| Sha256Hex = Annotated[str, Field(pattern=r"^[0-9a-f]{64}$")] |
|
|
|
|
| def _require_utc(value: datetime, field_name: str) -> datetime: |
| """Validate that a datetime is timezone-aware UTC.""" |
| if value.tzinfo is None or value.utcoffset() is None: |
| raise ValueError(f"{field_name} must be timezone-aware UTC") |
| if value.utcoffset() != UTC.utcoffset(value): |
| raise ValueError(f"{field_name} must be UTC") |
| return value |
|
|
|
|
| def generate_txn_id(now: datetime | None = None) -> str: |
| """Generate a transaction identifier in the canonical Week 2 format. |
| |
| Args: |
| now: Optional timestamp override. If omitted, current UTC time is used. |
| |
| Returns: |
| A transaction identifier like ``txn-2026-04-20-a1b2c3``. |
| """ |
| current = now or datetime.now(UTC) |
| current_utc = current.astimezone(UTC) |
| return f"txn-{current_utc:%Y-%m-%d}-{secrets.token_hex(3)}" |
|
|
|
|
| class CellFix(BaseModel): |
| """A single cell mutation proposed or applied by DataForge. |
| |
| Args: |
| row: Zero-indexed row number in the CSV body. |
| column: Column name to update. |
| old_value: The value observed before repair. |
| new_value: The value to write during repair. |
| detector_id: The detector / repairer family that produced the fix. |
| """ |
|
|
| row: int = Field(ge=0, description="Zero-indexed row number") |
| column: str = Field(min_length=1, description="Column name") |
| old_value: str = Field(description="Original value before repair") |
| new_value: str = Field(description="Replacement value after repair") |
| detector_id: str = Field(min_length=1, description="Detector / repairer identifier") |
| operation: Literal["update", "delete_row"] = Field( |
| default="update", |
| description="Repair operation kind", |
| ) |
|
|
| model_config = {"frozen": True} |
|
|
|
|
| class RepairTransaction(BaseModel): |
| """Audit record for a reversible repair transaction. |
| |
| Args: |
| txn_id: Canonical transaction identifier. |
| created_at: UTC timestamp when the transaction was recorded. |
| source_path: Absolute path to the repaired source file. |
| source_sha256: SHA-256 of the original source bytes. |
| post_sha256: SHA-256 of the applied file bytes, once written. |
| source_snapshot_path: Absolute path to the immutable source snapshot. |
| fixes: Ordered list of cell fixes recorded for auditability. |
| applied: Whether the journal records that the repair was applied. |
| reverted_at: UTC timestamp when the transaction was reverted, if any. |
| """ |
|
|
| txn_id: TxnId |
| created_at: datetime |
| source_path: str = Field(min_length=1) |
| source_sha256: Sha256Hex |
| post_sha256: Sha256Hex | None = None |
| source_snapshot_path: str = Field(min_length=1) |
| fixes: list[CellFix] = Field(default_factory=list) |
| applied: bool |
| reverted_at: datetime | None = None |
|
|
| @field_validator("created_at") |
| @classmethod |
| def _validate_created_at(cls, value: datetime) -> datetime: |
| """Require ``created_at`` to be UTC.""" |
| return _require_utc(value, "created_at") |
|
|
| @field_validator("reverted_at") |
| @classmethod |
| def _validate_reverted_at(cls, value: datetime | None) -> datetime | None: |
| """Require ``reverted_at`` to be UTC when present.""" |
| if value is None: |
| return None |
| return _require_utc(value, "reverted_at") |
|
|
| model_config = {"frozen": True} |
|
|