File size: 3,081 Bytes
b339b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""Exception hierarchy for distinct legislators extractor."""

from dataclasses import dataclass
from pathlib import Path


@dataclass
class DistinctLegislatorsError(Exception):
    """Base exception for distinct legislators extraction errors."""

    message: str

    def __str__(self) -> str:
        return self.message


@dataclass
class SourceReadError(DistinctLegislatorsError):
    """Raised when source data cannot be read."""

    source_url: str

    def __str__(self) -> str:
        return f"Failed to read source: {self.source_url}\n{self.message}"


@dataclass
class CompletenessError(DistinctLegislatorsError):
    """Raised when completeness validation fails (Tier 1)."""

    expected_count: int
    actual_count: int
    missing_ids: list[str] | None = None
    extra_ids: list[str] | None = None

    def __str__(self) -> str:
        parts = [f"Completeness validation failed: {self.message}"]
        parts.append(f"Expected {self.expected_count:,} legislators, got {self.actual_count:,}")
        if self.missing_ids:
            parts.append(
                f"Missing: {self.missing_ids[:5]}{'...' if len(self.missing_ids) > 5 else ''}"
            )
        if self.extra_ids:
            parts.append(f"Extra: {self.extra_ids[:5]}{'...' if len(self.extra_ids) > 5 else ''}")
        return "\n".join(parts)


@dataclass
class AggregationError(DistinctLegislatorsError):
    """Raised when aggregation validation fails (Tier 2)."""

    bioguide_id: str
    field_name: str
    expected_value: str
    actual_value: str

    def __str__(self) -> str:
        return (
            f"Aggregation validation failed for {self.bioguide_id}: {self.message}\n"
            f"Field: {self.field_name}\n"
            f"Expected: {self.expected_value}\n"
            f"Actual: {self.actual_value}"
        )


@dataclass
class SampleValidationError(DistinctLegislatorsError):
    """Raised when sample validation fails (Tier 3)."""

    bioguide_id: str
    field_name: str
    expected_value: str
    actual_value: str
    sample_index: int

    def __str__(self) -> str:
        return (
            f"Sample validation failed at index {self.sample_index}: {self.message}\n"
            f"Legislator: {self.bioguide_id}\n"
            f"Field: {self.field_name}\n"
            f"Expected: {self.expected_value}\n"
            f"Actual: {self.actual_value}"
        )


@dataclass
class OutputWriteError(DistinctLegislatorsError):
    """Raised when output cannot be written."""

    output_path: Path

    def __str__(self) -> str:
        return f"Failed to write output: {self.output_path}\n{self.message}"


@dataclass
class InvalidSourceURLError(DistinctLegislatorsError):
    """Raised when source URL is not from an allowed domain."""

    source_url: str
    allowed_domains: list[str]

    def __str__(self) -> str:
        domains = ", ".join(self.allowed_domains)
        return f"Invalid source URL: {self.source_url}\nAllowed domains: {domains}"