Spaces:
Runtime error
Runtime error
| """Command-line interface for Voteview CSV to Parquet converter.""" | |
| import argparse | |
| import sys | |
| from datetime import datetime | |
| from pathlib import Path | |
| from .converter import convert_voteview_file | |
| from .exceptions import VoteviewConversionError | |
| from .schema import FileType | |
| def detect_file_type(filename: str) -> FileType: | |
| """ | |
| Auto-detect file type from filename. | |
| Recognizes patterns like: | |
| - HSall_members.csv -> MEMBERS | |
| - HSall_rollcalls.csv -> ROLLCALLS | |
| - HSall_votes.csv -> VOTES | |
| """ | |
| name_lower = filename.lower() | |
| if "member" in name_lower: | |
| return FileType.MEMBERS | |
| if "rollcall" in name_lower: | |
| return FileType.ROLLCALLS | |
| if "vote" in name_lower: | |
| return FileType.VOTES | |
| raise ValueError(f"Cannot auto-detect file type from: {filename}") | |
| def main() -> int: | |
| """Main entry point for the CLI.""" | |
| parser = argparse.ArgumentParser( | |
| description="Convert Voteview CSV files to Parquet format with validation", | |
| epilog=""" | |
| Examples: | |
| %(prog)s HSall_members.csv members.parquet | |
| %(prog)s HSall_rollcalls.csv rollcalls.parquet | |
| %(prog)s HSall_votes.csv votes.parquet --batch-size 200000 | |
| %(prog)s input.csv output.parquet -t votes | |
| """, | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| ) | |
| parser.add_argument( | |
| "source", | |
| type=Path, | |
| help="Source CSV file path", | |
| ) | |
| parser.add_argument( | |
| "output", | |
| type=Path, | |
| help="Output Parquet file path", | |
| ) | |
| parser.add_argument( | |
| "-t", | |
| "--file-type", | |
| choices=["members", "rollcalls", "votes"], | |
| default=None, | |
| help="Type of Voteview file (auto-detected from filename if not specified)", | |
| ) | |
| parser.add_argument( | |
| "--no-validate", | |
| action="store_true", | |
| help="Skip validation (not recommended)", | |
| ) | |
| parser.add_argument( | |
| "--sample-size", | |
| type=int, | |
| default=None, | |
| help="Number of rows to sample for validation (uses type default if not specified)", | |
| ) | |
| parser.add_argument( | |
| "--batch-size", | |
| type=int, | |
| default=100_000, | |
| help="Rows per batch for streaming conversion (default: 100000)", | |
| ) | |
| args = parser.parse_args() | |
| # Validate source exists | |
| if not args.source.exists(): | |
| print(f"ERROR: Source file not found: {args.source}", file=sys.stderr) | |
| return 1 | |
| # Determine file type | |
| if args.file_type: | |
| file_type = FileType(args.file_type) | |
| else: | |
| try: | |
| file_type = detect_file_type(args.source.name) | |
| except ValueError as e: | |
| print(f"ERROR: {e}", file=sys.stderr) | |
| print("Use -t/--file-type to specify explicitly", file=sys.stderr) | |
| return 1 | |
| print(f"[{datetime.now().isoformat()}] Converting: {args.source.name}") | |
| print(f" File type: {file_type.value}") | |
| try: | |
| result = convert_voteview_file( | |
| args.source, | |
| args.output, | |
| file_type, | |
| validate=not args.no_validate, | |
| sample_size=args.sample_size, | |
| batch_size=args.batch_size, | |
| ) | |
| print(f"[{datetime.now().isoformat()}] SUCCESS: {result.row_count:,} rows") | |
| print(f" Output: {result.output_path}") | |
| if args.no_validate: | |
| print(" Validation: SKIPPED") | |
| else: | |
| print(" Validation: ALL PASSED") | |
| return 0 | |
| except VoteviewConversionError as e: | |
| print(f"ERROR: {e}", file=sys.stderr) | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |