File size: 3,722 Bytes
5143557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""Transaction models for reversible DataForge repairs."""

from __future__ import annotations

import secrets
from datetime import UTC, datetime
from typing import Annotated, Literal

from pydantic import BaseModel, Field, field_validator

TxnId = Annotated[str, Field(pattern=r"^txn-\d{4}-\d{2}-\d{2}-[0-9a-f]{6}$")]
Sha256Hex = Annotated[str, Field(pattern=r"^[0-9a-f]{64}$")]


def _require_utc(value: datetime, field_name: str) -> datetime:
    """Validate that a datetime is timezone-aware UTC."""
    if value.tzinfo is None or value.utcoffset() is None:
        raise ValueError(f"{field_name} must be timezone-aware UTC")
    if value.utcoffset() != UTC.utcoffset(value):
        raise ValueError(f"{field_name} must be UTC")
    return value


def generate_txn_id(now: datetime | None = None) -> str:
    """Generate a transaction identifier in the canonical Week 2 format.

    Args:
        now: Optional timestamp override. If omitted, current UTC time is used.

    Returns:
        A transaction identifier like ``txn-2026-04-20-a1b2c3``.
    """
    current = now or datetime.now(UTC)
    current_utc = current.astimezone(UTC)
    return f"txn-{current_utc:%Y-%m-%d}-{secrets.token_hex(3)}"


class CellFix(BaseModel):
    """A single cell mutation proposed or applied by DataForge.

    Args:
        row: Zero-indexed row number in the CSV body.
        column: Column name to update.
        old_value: The value observed before repair.
        new_value: The value to write during repair.
        detector_id: The detector / repairer family that produced the fix.
    """

    row: int = Field(ge=0, description="Zero-indexed row number")
    column: str = Field(min_length=1, description="Column name")
    old_value: str = Field(description="Original value before repair")
    new_value: str = Field(description="Replacement value after repair")
    detector_id: str = Field(min_length=1, description="Detector / repairer identifier")
    operation: Literal["update", "delete_row"] = Field(
        default="update",
        description="Repair operation kind",
    )

    model_config = {"frozen": True}


class RepairTransaction(BaseModel):
    """Audit record for a reversible repair transaction.

    Args:
        txn_id: Canonical transaction identifier.
        created_at: UTC timestamp when the transaction was recorded.
        source_path: Absolute path to the repaired source file.
        source_sha256: SHA-256 of the original source bytes.
        post_sha256: SHA-256 of the applied file bytes, once written.
        source_snapshot_path: Absolute path to the immutable source snapshot.
        fixes: Ordered list of cell fixes recorded for auditability.
        applied: Whether the journal records that the repair was applied.
        reverted_at: UTC timestamp when the transaction was reverted, if any.
    """

    txn_id: TxnId
    created_at: datetime
    source_path: str = Field(min_length=1)
    source_sha256: Sha256Hex
    post_sha256: Sha256Hex | None = None
    source_snapshot_path: str = Field(min_length=1)
    fixes: list[CellFix] = Field(default_factory=list)
    applied: bool
    reverted_at: datetime | None = None

    @field_validator("created_at")
    @classmethod
    def _validate_created_at(cls, value: datetime) -> datetime:
        """Require ``created_at`` to be UTC."""
        return _require_utc(value, "created_at")

    @field_validator("reverted_at")
    @classmethod
    def _validate_reverted_at(cls, value: datetime | None) -> datetime | None:
        """Require ``reverted_at`` to be UTC when present."""
        if value is None:
            return None
        return _require_utc(value, "reverted_at")

    model_config = {"frozen": True}