Spaces:
Runtime error
Runtime error
| """Command-line interface for legislator crosswalk extractor.""" | |
| from __future__ import annotations | |
| import argparse | |
| import sys | |
| from datetime import datetime | |
| from pathlib import Path | |
| from .exceptions import CrosswalkError | |
| from .extractor import extract_crosswalk | |
| from .schema import DIME_RECIPIENTS_URL | |
| def main() -> int: | |
| """Main entry point for the CLI.""" | |
| parser = argparse.ArgumentParser( | |
| description="Extract legislator-recipient crosswalk from DIME Recipients data", | |
| epilog=""" | |
| Examples: | |
| %(prog)s crosswalk.parquet | |
| %(prog)s crosswalk.parquet --no-validate | |
| %(prog)s crosswalk.parquet --sample-size 200 | |
| """, | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| ) | |
| parser.add_argument( | |
| "output", | |
| type=Path, | |
| help="Output Parquet file path", | |
| ) | |
| parser.add_argument( | |
| "--source-url", | |
| type=str, | |
| default=DIME_RECIPIENTS_URL, | |
| help="Source parquet URL (default: HuggingFace DIME Recipients)", | |
| ) | |
| parser.add_argument( | |
| "--no-validate", | |
| action="store_true", | |
| help="Skip validation (not recommended)", | |
| ) | |
| parser.add_argument( | |
| "--sample-size", | |
| type=int, | |
| default=100, | |
| help="Sample size for validation (default: 100)", | |
| ) | |
| args = parser.parse_args() | |
| print(f"[{datetime.now().isoformat()}] Extracting legislator-recipient crosswalk") | |
| try: | |
| result = extract_crosswalk( | |
| args.output, | |
| source_url=args.source_url, | |
| validate=not args.no_validate, | |
| sample_size=args.sample_size, | |
| ) | |
| print(f"\n[{datetime.now().isoformat()}] SUCCESS") | |
| print(f" Output: {result.output_path}") | |
| print(f" Size: {result.output_path.stat().st_size / 1024:.1f} KB") | |
| print(f" Crosswalk rows: {result.output_count:,}") | |
| print(f" Unique legislators (ICPSR): {result.unique_icpsr_count:,}") | |
| print(f" Unique recipients (bonica_rid): {result.unique_bonica_rid_count:,}") | |
| if result.unique_icpsr_count > 0: | |
| avg = result.unique_bonica_rid_count / result.unique_icpsr_count | |
| print(f" Avg recipients per legislator: {avg:.1f}") | |
| if args.no_validate: | |
| print(" Validation: SKIPPED") | |
| else: | |
| print(" Validation: ALL PASSED") | |
| return 0 | |
| except CrosswalkError as e: | |
| print(f"ERROR: {e}", file=sys.stderr) | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |