File size: 2,014 Bytes
b339b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Exception hierarchy for legislator crosswalk extractor."""

from dataclasses import dataclass
from pathlib import Path


@dataclass
class CrosswalkError(Exception):
    """Base exception for crosswalk extraction errors."""

    message: str

    def __str__(self) -> str:
        return self.message


@dataclass
class SourceReadError(CrosswalkError):
    """Raised when source data cannot be read."""

    source_url: str

    def __str__(self) -> str:
        return f"Failed to read source: {self.source_url}\n{self.message}"


@dataclass
class InvalidSourceURLError(CrosswalkError):
    """Raised when source URL is not from an allowed domain."""

    source_url: str
    allowed_domains: list[str]

    def __str__(self) -> str:
        domains = ", ".join(self.allowed_domains)
        return f"Invalid source URL: {self.source_url}\nAllowed domains: {domains}"


@dataclass
class OutputWriteError(CrosswalkError):
    """Raised when output cannot be written."""

    output_path: Path

    def __str__(self) -> str:
        return f"Failed to write output: {self.output_path}\n{self.message}"


@dataclass
class ValidationError(CrosswalkError):
    """Raised when validation fails."""

    expected_count: int
    actual_count: int

    def __str__(self) -> str:
        return (
            f"Validation failed: {self.message}\n"
            f"Expected: {self.expected_count:,}\n"
            f"Actual: {self.actual_count:,}"
        )


@dataclass
class DuplicateKeyError(CrosswalkError):
    """Raised when duplicate key pairs are found."""

    duplicate_count: int
    sample_duplicates: list[tuple[str, str]] | None = None

    def __str__(self) -> str:
        msg = f"Found {self.duplicate_count:,} duplicate (icpsr, bonica_rid) pairs"
        if self.sample_duplicates:
            samples = ", ".join(f"({i}, {b})" for i, b in self.sample_duplicates[:5])
            msg += f"\nExamples: {samples}"
        return msg