Hoe
Deploying Backend API
b339b93
"""Distinct legislators extractor from Voteview congressional data.
This module extracts a deduplicated list of legislators from Voteview's
HSall_members data, aggregating congress sessions served for each legislator.
Unlike CSV→Parquet converters which perform lossless conversion, this module
performs aggregation (GROUP BY bioguide_id) and validates correct transformation
through three-tier validation:
- Tier 1: Completeness - every source bioguide_id appears exactly once
- Tier 2: Aggregation Integrity - MIN/MAX/LIST operations are correct
- Tier 3: Sample Verification - deep validation of random legislators
Example usage:
from distinct_legislators import extract_distinct_legislators
result = extract_distinct_legislators("legislators.parquet")
print(f"Extracted {result.output_count:,} legislators")
"""
from .exceptions import (
AggregationError,
CompletenessError,
DistinctLegislatorsError,
InvalidSourceURLError,
OutputWriteError,
SampleValidationError,
SourceReadError,
)
from .extractor import ExtractionResult, extract_distinct_legislators
from .schema import (
DISTINCT_LEGISLATORS_COLUMNS,
DISTINCT_LEGISLATORS_SCHEMA,
MIN_CONGRESS,
VOTEVIEW_MEMBERS_URL,
congress_to_years,
)
from .validators import ValidationResult
__all__ = [
# Core functions
"extract_distinct_legislators",
"congress_to_years",
# Data classes
"ExtractionResult",
"ValidationResult",
# Schema
"DISTINCT_LEGISLATORS_SCHEMA",
"DISTINCT_LEGISLATORS_COLUMNS",
"MIN_CONGRESS",
"VOTEVIEW_MEMBERS_URL",
# Exceptions
"DistinctLegislatorsError",
"InvalidSourceURLError",
"SourceReadError",
"CompletenessError",
"AggregationError",
"SampleValidationError",
"OutputWriteError",
]