Spaces:
Runtime error
Runtime error
| """Distinct legislators extractor from Voteview congressional data. | |
| This module extracts a deduplicated list of legislators from Voteview's | |
| HSall_members data, aggregating congress sessions served for each legislator. | |
| Unlike CSV→Parquet converters which perform lossless conversion, this module | |
| performs aggregation (GROUP BY bioguide_id) and validates correct transformation | |
| through three-tier validation: | |
| - Tier 1: Completeness - every source bioguide_id appears exactly once | |
| - Tier 2: Aggregation Integrity - MIN/MAX/LIST operations are correct | |
| - Tier 3: Sample Verification - deep validation of random legislators | |
| Example usage: | |
| from distinct_legislators import extract_distinct_legislators | |
| result = extract_distinct_legislators("legislators.parquet") | |
| print(f"Extracted {result.output_count:,} legislators") | |
| """ | |
| from .exceptions import ( | |
| AggregationError, | |
| CompletenessError, | |
| DistinctLegislatorsError, | |
| InvalidSourceURLError, | |
| OutputWriteError, | |
| SampleValidationError, | |
| SourceReadError, | |
| ) | |
| from .extractor import ExtractionResult, extract_distinct_legislators | |
| from .schema import ( | |
| DISTINCT_LEGISLATORS_COLUMNS, | |
| DISTINCT_LEGISLATORS_SCHEMA, | |
| MIN_CONGRESS, | |
| VOTEVIEW_MEMBERS_URL, | |
| congress_to_years, | |
| ) | |
| from .validators import ValidationResult | |
| __all__ = [ | |
| # Core functions | |
| "extract_distinct_legislators", | |
| "congress_to_years", | |
| # Data classes | |
| "ExtractionResult", | |
| "ValidationResult", | |
| # Schema | |
| "DISTINCT_LEGISLATORS_SCHEMA", | |
| "DISTINCT_LEGISLATORS_COLUMNS", | |
| "MIN_CONGRESS", | |
| "VOTEVIEW_MEMBERS_URL", | |
| # Exceptions | |
| "DistinctLegislatorsError", | |
| "InvalidSourceURLError", | |
| "SourceReadError", | |
| "CompletenessError", | |
| "AggregationError", | |
| "SampleValidationError", | |
| "OutputWriteError", | |
| ] | |