| """Command-line interface for Indian Address Parser."""
|
|
|
| import argparse
|
| import json
|
| import sys
|
| from pathlib import Path
|
|
|
|
|
| def main():
|
| """Main CLI entry point."""
|
| parser = argparse.ArgumentParser(
|
| description="Parse Indian addresses using NER",
|
| formatter_class=argparse.RawDescriptionHelpFormatter,
|
| epilog="""
|
| Examples:
|
| # Parse single address
|
| address-parser "PLOT NO752 FIRST FLOOR, NEW DELHI, 110041"
|
|
|
| # Parse from file
|
| address-parser --input addresses.txt --output parsed.json
|
|
|
| # Use trained model
|
| address-parser --model ./models/address_ner_v3 "H.NO. 123, LAJPAT NAGAR"
|
| """
|
| )
|
|
|
| parser.add_argument(
|
| "address",
|
| nargs="?",
|
| help="Address to parse (or use --input for file)"
|
| )
|
| parser.add_argument(
|
| "--input", "-i",
|
| help="Input file with addresses (one per line)"
|
| )
|
| parser.add_argument(
|
| "--output", "-o",
|
| help="Output JSON file"
|
| )
|
| parser.add_argument(
|
| "--model", "-m",
|
| help="Path to trained model directory"
|
| )
|
| parser.add_argument(
|
| "--format", "-f",
|
| choices=["json", "table", "simple"],
|
| default="json",
|
| help="Output format (default: json)"
|
| )
|
| parser.add_argument(
|
| "--version", "-v",
|
| action="version",
|
| version="indian-address-parser 2.0.0"
|
| )
|
|
|
| args = parser.parse_args()
|
|
|
|
|
| from address_parser import AddressParser
|
|
|
|
|
| if args.model and Path(args.model).exists():
|
| print(f"Loading model from {args.model}...", file=sys.stderr)
|
| address_parser = AddressParser.from_pretrained(args.model)
|
| else:
|
| print("Using rules-only mode", file=sys.stderr)
|
| address_parser = AddressParser.rules_only()
|
|
|
|
|
| addresses = []
|
| if args.input:
|
| with open(args.input, encoding="utf-8") as f:
|
| addresses = [line.strip() for line in f if line.strip()]
|
| elif args.address:
|
| addresses = [args.address]
|
| else:
|
| parser.print_help()
|
| sys.exit(1)
|
|
|
|
|
| results = []
|
| for addr in addresses:
|
| result = address_parser.parse(addr)
|
| results.append(result)
|
|
|
|
|
| if args.format == "json":
|
| output = [r.model_dump() for r in results]
|
| json_str = json.dumps(output, indent=2, ensure_ascii=False)
|
|
|
| if args.output:
|
| with open(args.output, "w", encoding="utf-8") as f:
|
| f.write(json_str)
|
| print(f"Saved to {args.output}", file=sys.stderr)
|
| else:
|
| print(json_str)
|
|
|
| elif args.format == "table":
|
| for i, result in enumerate(results):
|
| print(f"\n{'='*60}")
|
| print(f"Address {i+1}: {result.raw_address[:50]}...")
|
| print(f"{'='*60}")
|
| print(f"{'Entity':<15} {'Value':<40} {'Conf':<6}")
|
| print("-" * 60)
|
| for entity in result.entities:
|
| print(f"{entity.label:<15} {entity.value:<40} {entity.confidence:.0%}")
|
|
|
| else:
|
| for result in results:
|
| parts = []
|
| if result.house_number:
|
| parts.append(f"House: {result.house_number}")
|
| if result.floor:
|
| parts.append(f"Floor: {result.floor}")
|
| if result.block:
|
| parts.append(f"Block: {result.block}")
|
| if result.gali:
|
| parts.append(f"Gali: {result.gali}")
|
| if result.colony:
|
| parts.append(f"Colony: {result.colony}")
|
| if result.area:
|
| parts.append(f"Area: {result.area}")
|
| if result.pincode:
|
| parts.append(f"PIN: {result.pincode}")
|
| if result.city:
|
| parts.append(f"City: {result.city}")
|
|
|
| print(" | ".join(parts) if parts else "No entities found")
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|