File size: 4,080 Bytes
47bc13b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""Command-line interface for Indian Address Parser."""

import argparse
import json
import sys
from pathlib import Path


def main():
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        description="Parse Indian addresses using NER",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""

Examples:

  # Parse single address

  address-parser "PLOT NO752 FIRST FLOOR, NEW DELHI, 110041"



  # Parse from file

  address-parser --input addresses.txt --output parsed.json



  # Use trained model

  address-parser --model ./models/address_ner_v3 "H.NO. 123, LAJPAT NAGAR"

        """
    )

    parser.add_argument(
        "address",
        nargs="?",
        help="Address to parse (or use --input for file)"
    )
    parser.add_argument(
        "--input", "-i",
        help="Input file with addresses (one per line)"
    )
    parser.add_argument(
        "--output", "-o",
        help="Output JSON file"
    )
    parser.add_argument(
        "--model", "-m",
        help="Path to trained model directory"
    )
    parser.add_argument(
        "--format", "-f",
        choices=["json", "table", "simple"],
        default="json",
        help="Output format (default: json)"
    )
    parser.add_argument(
        "--version", "-v",
        action="version",
        version="indian-address-parser 2.0.0"
    )

    args = parser.parse_args()

    # Import here to avoid slow startup
    from address_parser import AddressParser

    # Load parser
    if args.model and Path(args.model).exists():
        print(f"Loading model from {args.model}...", file=sys.stderr)
        address_parser = AddressParser.from_pretrained(args.model)
    else:
        print("Using rules-only mode", file=sys.stderr)
        address_parser = AddressParser.rules_only()

    # Get addresses to parse
    addresses = []
    if args.input:
        with open(args.input, encoding="utf-8") as f:
            addresses = [line.strip() for line in f if line.strip()]
    elif args.address:
        addresses = [args.address]
    else:
        parser.print_help()
        sys.exit(1)

    # Parse addresses
    results = []
    for addr in addresses:
        result = address_parser.parse(addr)
        results.append(result)

    # Output
    if args.format == "json":
        output = [r.model_dump() for r in results]
        json_str = json.dumps(output, indent=2, ensure_ascii=False)

        if args.output:
            with open(args.output, "w", encoding="utf-8") as f:
                f.write(json_str)
            print(f"Saved to {args.output}", file=sys.stderr)
        else:
            print(json_str)

    elif args.format == "table":
        for i, result in enumerate(results):
            print(f"\n{'='*60}")
            print(f"Address {i+1}: {result.raw_address[:50]}...")
            print(f"{'='*60}")
            print(f"{'Entity':<15} {'Value':<40} {'Conf':<6}")
            print("-" * 60)
            for entity in result.entities:
                print(f"{entity.label:<15} {entity.value:<40} {entity.confidence:.0%}")

    else:  # simple
        for result in results:
            parts = []
            if result.house_number:
                parts.append(f"House: {result.house_number}")
            if result.floor:
                parts.append(f"Floor: {result.floor}")
            if result.block:
                parts.append(f"Block: {result.block}")
            if result.gali:
                parts.append(f"Gali: {result.gali}")
            if result.colony:
                parts.append(f"Colony: {result.colony}")
            if result.area:
                parts.append(f"Area: {result.area}")
            if result.pincode:
                parts.append(f"PIN: {result.pincode}")
            if result.city:
                parts.append(f"City: {result.city}")

            print(" | ".join(parts) if parts else "No entities found")


if __name__ == "__main__":
    main()