Spaces:
Runtime error
Runtime error
File size: 1,864 Bytes
b339b93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | """Distinct legislators extractor from Voteview congressional data.
This module extracts a deduplicated list of legislators from Voteview's
HSall_members data, aggregating congress sessions served for each legislator.
Unlike CSV→Parquet converters which perform lossless conversion, this module
performs aggregation (GROUP BY bioguide_id) and validates correct transformation
through three-tier validation:
- Tier 1: Completeness - every source bioguide_id appears exactly once
- Tier 2: Aggregation Integrity - MIN/MAX/LIST operations are correct
- Tier 3: Sample Verification - deep validation of random legislators
Example usage:
from distinct_legislators import extract_distinct_legislators
result = extract_distinct_legislators("legislators.parquet")
print(f"Extracted {result.output_count:,} legislators")
"""
from .exceptions import (
AggregationError,
CompletenessError,
DistinctLegislatorsError,
InvalidSourceURLError,
OutputWriteError,
SampleValidationError,
SourceReadError,
)
from .extractor import ExtractionResult, extract_distinct_legislators
from .schema import (
DISTINCT_LEGISLATORS_COLUMNS,
DISTINCT_LEGISLATORS_SCHEMA,
MIN_CONGRESS,
VOTEVIEW_MEMBERS_URL,
congress_to_years,
)
from .validators import ValidationResult
__all__ = [
# Core functions
"extract_distinct_legislators",
"congress_to_years",
# Data classes
"ExtractionResult",
"ValidationResult",
# Schema
"DISTINCT_LEGISLATORS_SCHEMA",
"DISTINCT_LEGISLATORS_COLUMNS",
"MIN_CONGRESS",
"VOTEVIEW_MEMBERS_URL",
# Exceptions
"DistinctLegislatorsError",
"InvalidSourceURLError",
"SourceReadError",
"CompletenessError",
"AggregationError",
"SampleValidationError",
"OutputWriteError",
]
|