File size: 1,864 Bytes
b339b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""Distinct legislators extractor from Voteview congressional data.



This module extracts a deduplicated list of legislators from Voteview's

HSall_members data, aggregating congress sessions served for each legislator.



Unlike CSV→Parquet converters which perform lossless conversion, this module

performs aggregation (GROUP BY bioguide_id) and validates correct transformation

through three-tier validation:



- Tier 1: Completeness - every source bioguide_id appears exactly once

- Tier 2: Aggregation Integrity - MIN/MAX/LIST operations are correct

- Tier 3: Sample Verification - deep validation of random legislators



Example usage:

    from distinct_legislators import extract_distinct_legislators



    result = extract_distinct_legislators("legislators.parquet")

    print(f"Extracted {result.output_count:,} legislators")

"""

from .exceptions import (
    AggregationError,
    CompletenessError,
    DistinctLegislatorsError,
    InvalidSourceURLError,
    OutputWriteError,
    SampleValidationError,
    SourceReadError,
)
from .extractor import ExtractionResult, extract_distinct_legislators
from .schema import (
    DISTINCT_LEGISLATORS_COLUMNS,
    DISTINCT_LEGISLATORS_SCHEMA,
    MIN_CONGRESS,
    VOTEVIEW_MEMBERS_URL,
    congress_to_years,
)
from .validators import ValidationResult

__all__ = [
    # Core functions
    "extract_distinct_legislators",
    "congress_to_years",
    # Data classes
    "ExtractionResult",
    "ValidationResult",
    # Schema
    "DISTINCT_LEGISLATORS_SCHEMA",
    "DISTINCT_LEGISLATORS_COLUMNS",
    "MIN_CONGRESS",
    "VOTEVIEW_MEMBERS_URL",
    # Exceptions
    "DistinctLegislatorsError",
    "InvalidSourceURLError",
    "SourceReadError",
    "CompletenessError",
    "AggregationError",
    "SampleValidationError",
    "OutputWriteError",
]