Spaces:
Runtime error
Runtime error
| """Command-line interface for DIME converter.""" | |
| import argparse | |
| import sys | |
| from datetime import datetime | |
| from pathlib import Path | |
| from .converter import convert_dime_file | |
| from .exceptions import DIMEConversionError | |
| from .schema import FileType | |
| def detect_file_type(filename: str) -> FileType: | |
| """Auto-detect file type from filename.""" | |
| name_lower = filename.lower() | |
| if "recipient" in name_lower: | |
| return FileType.RECIPIENTS | |
| if "contributor" in name_lower: | |
| return FileType.CONTRIBUTORS | |
| return FileType.CONTRIBUTIONS | |
| def main() -> int: | |
| parser = argparse.ArgumentParser( | |
| description="Convert DIME campaign finance CSV.gz files to Parquet format" | |
| ) | |
| parser.add_argument( | |
| "source", | |
| type=Path, | |
| help="Source CSV.gz file path", | |
| ) | |
| parser.add_argument( | |
| "output", | |
| type=Path, | |
| help="Output Parquet file path", | |
| ) | |
| parser.add_argument( | |
| "-t", | |
| "--file-type", | |
| choices=["contributions", "recipients", "contributors"], | |
| default=None, | |
| help="Type of DIME file (auto-detected from filename if not specified)", | |
| ) | |
| parser.add_argument( | |
| "--no-validate", | |
| action="store_true", | |
| help="Skip validation (not recommended)", | |
| ) | |
| parser.add_argument( | |
| "--sample-size", | |
| type=int, | |
| default=1000, | |
| help="Number of rows to sample for validation (default: 1000)", | |
| ) | |
| parser.add_argument( | |
| "--batch-size", | |
| type=int, | |
| default=100_000, | |
| help="Rows per batch for streaming conversion (default: 100000)", | |
| ) | |
| args = parser.parse_args() | |
| # Determine file type | |
| if args.file_type: | |
| file_type = FileType(args.file_type) | |
| else: | |
| file_type = detect_file_type(args.source.name) | |
| print(f"[{datetime.now().isoformat()}] Converting: {args.source.name}") | |
| print(f" File type: {file_type.value}") | |
| try: | |
| result = convert_dime_file( | |
| args.source, | |
| args.output, | |
| file_type, | |
| validate=not args.no_validate, | |
| sample_size=args.sample_size, | |
| batch_size=args.batch_size, | |
| ) | |
| print(f"[{datetime.now().isoformat()}] SUCCESS: {result.row_count:,} rows") | |
| return 0 | |
| except DIMEConversionError as e: | |
| print(f"ERROR: {e}", file=sys.stderr) | |
| return 1 | |
| def __main__(): | |
| sys.exit(main()) | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |