Spaces:
Sleeping
Sleeping
File size: 4,080 Bytes
47bc13b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
"""Command-line interface for Indian Address Parser."""
import argparse
import json
import sys
from pathlib import Path
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="Parse Indian addresses using NER",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Parse single address
address-parser "PLOT NO752 FIRST FLOOR, NEW DELHI, 110041"
# Parse from file
address-parser --input addresses.txt --output parsed.json
# Use trained model
address-parser --model ./models/address_ner_v3 "H.NO. 123, LAJPAT NAGAR"
"""
)
parser.add_argument(
"address",
nargs="?",
help="Address to parse (or use --input for file)"
)
parser.add_argument(
"--input", "-i",
help="Input file with addresses (one per line)"
)
parser.add_argument(
"--output", "-o",
help="Output JSON file"
)
parser.add_argument(
"--model", "-m",
help="Path to trained model directory"
)
parser.add_argument(
"--format", "-f",
choices=["json", "table", "simple"],
default="json",
help="Output format (default: json)"
)
parser.add_argument(
"--version", "-v",
action="version",
version="indian-address-parser 2.0.0"
)
args = parser.parse_args()
# Import here to avoid slow startup
from address_parser import AddressParser
# Load parser
if args.model and Path(args.model).exists():
print(f"Loading model from {args.model}...", file=sys.stderr)
address_parser = AddressParser.from_pretrained(args.model)
else:
print("Using rules-only mode", file=sys.stderr)
address_parser = AddressParser.rules_only()
# Get addresses to parse
addresses = []
if args.input:
with open(args.input, encoding="utf-8") as f:
addresses = [line.strip() for line in f if line.strip()]
elif args.address:
addresses = [args.address]
else:
parser.print_help()
sys.exit(1)
# Parse addresses
results = []
for addr in addresses:
result = address_parser.parse(addr)
results.append(result)
# Output
if args.format == "json":
output = [r.model_dump() for r in results]
json_str = json.dumps(output, indent=2, ensure_ascii=False)
if args.output:
with open(args.output, "w", encoding="utf-8") as f:
f.write(json_str)
print(f"Saved to {args.output}", file=sys.stderr)
else:
print(json_str)
elif args.format == "table":
for i, result in enumerate(results):
print(f"\n{'='*60}")
print(f"Address {i+1}: {result.raw_address[:50]}...")
print(f"{'='*60}")
print(f"{'Entity':<15} {'Value':<40} {'Conf':<6}")
print("-" * 60)
for entity in result.entities:
print(f"{entity.label:<15} {entity.value:<40} {entity.confidence:.0%}")
else: # simple
for result in results:
parts = []
if result.house_number:
parts.append(f"House: {result.house_number}")
if result.floor:
parts.append(f"Floor: {result.floor}")
if result.block:
parts.append(f"Block: {result.block}")
if result.gali:
parts.append(f"Gali: {result.gali}")
if result.colony:
parts.append(f"Colony: {result.colony}")
if result.area:
parts.append(f"Area: {result.area}")
if result.pincode:
parts.append(f"PIN: {result.pincode}")
if result.city:
parts.append(f"City: {result.city}")
print(" | ".join(parts) if parts else "No entities found")
if __name__ == "__main__":
main()
|