Hoe
Deploying Backend API
b339b93
"""Command-line interface for DIME converter."""
import argparse
import sys
from datetime import datetime
from pathlib import Path
from .converter import convert_dime_file
from .exceptions import DIMEConversionError
from .schema import FileType
def detect_file_type(filename: str) -> FileType:
"""Auto-detect file type from filename."""
name_lower = filename.lower()
if "recipient" in name_lower:
return FileType.RECIPIENTS
if "contributor" in name_lower:
return FileType.CONTRIBUTORS
return FileType.CONTRIBUTIONS
def main() -> int:
parser = argparse.ArgumentParser(
description="Convert DIME campaign finance CSV.gz files to Parquet format"
)
parser.add_argument(
"source",
type=Path,
help="Source CSV.gz file path",
)
parser.add_argument(
"output",
type=Path,
help="Output Parquet file path",
)
parser.add_argument(
"-t",
"--file-type",
choices=["contributions", "recipients", "contributors"],
default=None,
help="Type of DIME file (auto-detected from filename if not specified)",
)
parser.add_argument(
"--no-validate",
action="store_true",
help="Skip validation (not recommended)",
)
parser.add_argument(
"--sample-size",
type=int,
default=1000,
help="Number of rows to sample for validation (default: 1000)",
)
parser.add_argument(
"--batch-size",
type=int,
default=100_000,
help="Rows per batch for streaming conversion (default: 100000)",
)
args = parser.parse_args()
# Determine file type
if args.file_type:
file_type = FileType(args.file_type)
else:
file_type = detect_file_type(args.source.name)
print(f"[{datetime.now().isoformat()}] Converting: {args.source.name}")
print(f" File type: {file_type.value}")
try:
result = convert_dime_file(
args.source,
args.output,
file_type,
validate=not args.no_validate,
sample_size=args.sample_size,
batch_size=args.batch_size,
)
print(f"[{datetime.now().isoformat()}] SUCCESS: {result.row_count:,} rows")
return 0
except DIMEConversionError as e:
print(f"ERROR: {e}", file=sys.stderr)
return 1
def __main__():
sys.exit(main())
if __name__ == "__main__":
sys.exit(main())