Hoe
Deploying Backend API
b339b93
"""Command-line interface for Voteview CSV to Parquet converter."""
import argparse
import sys
from datetime import datetime
from pathlib import Path
from .converter import convert_voteview_file
from .exceptions import VoteviewConversionError
from .schema import FileType
def detect_file_type(filename: str) -> FileType:
"""
Auto-detect file type from filename.
Recognizes patterns like:
- HSall_members.csv -> MEMBERS
- HSall_rollcalls.csv -> ROLLCALLS
- HSall_votes.csv -> VOTES
"""
name_lower = filename.lower()
if "member" in name_lower:
return FileType.MEMBERS
if "rollcall" in name_lower:
return FileType.ROLLCALLS
if "vote" in name_lower:
return FileType.VOTES
raise ValueError(f"Cannot auto-detect file type from: {filename}")
def main() -> int:
"""Main entry point for the CLI."""
parser = argparse.ArgumentParser(
description="Convert Voteview CSV files to Parquet format with validation",
epilog="""
Examples:
%(prog)s HSall_members.csv members.parquet
%(prog)s HSall_rollcalls.csv rollcalls.parquet
%(prog)s HSall_votes.csv votes.parquet --batch-size 200000
%(prog)s input.csv output.parquet -t votes
""",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"source",
type=Path,
help="Source CSV file path",
)
parser.add_argument(
"output",
type=Path,
help="Output Parquet file path",
)
parser.add_argument(
"-t",
"--file-type",
choices=["members", "rollcalls", "votes"],
default=None,
help="Type of Voteview file (auto-detected from filename if not specified)",
)
parser.add_argument(
"--no-validate",
action="store_true",
help="Skip validation (not recommended)",
)
parser.add_argument(
"--sample-size",
type=int,
default=None,
help="Number of rows to sample for validation (uses type default if not specified)",
)
parser.add_argument(
"--batch-size",
type=int,
default=100_000,
help="Rows per batch for streaming conversion (default: 100000)",
)
args = parser.parse_args()
# Validate source exists
if not args.source.exists():
print(f"ERROR: Source file not found: {args.source}", file=sys.stderr)
return 1
# Determine file type
if args.file_type:
file_type = FileType(args.file_type)
else:
try:
file_type = detect_file_type(args.source.name)
except ValueError as e:
print(f"ERROR: {e}", file=sys.stderr)
print("Use -t/--file-type to specify explicitly", file=sys.stderr)
return 1
print(f"[{datetime.now().isoformat()}] Converting: {args.source.name}")
print(f" File type: {file_type.value}")
try:
result = convert_voteview_file(
args.source,
args.output,
file_type,
validate=not args.no_validate,
sample_size=args.sample_size,
batch_size=args.batch_size,
)
print(f"[{datetime.now().isoformat()}] SUCCESS: {result.row_count:,} rows")
print(f" Output: {result.output_path}")
if args.no_validate:
print(" Validation: SKIPPED")
else:
print(" Validation: ALL PASSED")
return 0
except VoteviewConversionError as e:
print(f"ERROR: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())