File size: 4,884 Bytes
32bcb86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
import random
import re
from typing import Any, Dict, Iterator

# Based on the public Eircode format documentation and the public routing-area list.
# Routing keys were materialized from the Wikipedia routing-area table so generation
# can stay offline and reproducible in this workspace.

EIRCODE_ROUTING_KEYS = ['A92', 'Y14', 'A84', 'H65', 'N37', 'R14', 'K32', 'F26', 'H53', 'P31', 'F31', 'A75', 'A41', 'F35', 'F56', 'P72', 'P75', 'H14', 'R42', 'A94', 'F52', 'A98', 'V23', 'E21', 'R93', 'A81', 'N41', 'E32', 'P43', 'E25', 'F23', 'F45', 'H12', 'P56', 'F12', 'H71', 'P85', 'H23', 'E91', 'P24', 'H16', 'T12', 'T23', 'P14', 'P32', 'P47', 'T56', 'T34', 'R56', 'A63', 'F94', 'D01', 'D02', 'D03', 'D04', 'D05', 'D06', 'D6W', 'D07', 'D08', 'D09', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D20', 'D22', 'D24', 'A86', 'A91', 'X35', 'A85', 'R45', 'A83', 'V95', 'Y21', 'P61', 'H91', 'A42', 'A96', 'Y25', 'A82', 'R51', 'R95', 'V93', 'X42', 'V35', 'V15', 'P17', 'F92', 'F93', 'V94', 'V31', 'T45', 'N39', 'H62', 'K78', 'K45', 'P12', 'K36', 'P51', 'W23', 'P25', 'P67', 'H18', 'W34', 'R21', 'N91', 'W91', 'C15', 'E45', 'Y34', 'W12', 'V42', 'A45', 'R32', 'A67', 'F42', 'E53', 'K56', 'V14', 'K34', 'P81', 'F91', 'K67', 'E41', 'E34', 'V92', 'H54', 'R35', 'X91', 'F28', 'Y35', 'P36']
EIRCODE_ROUTING_KEY_SET = set(EIRCODE_ROUTING_KEYS)
UNIQUE_IDENTIFIER_CHARS = "0123456789ACDEFHKNPRTVWXY"
UNIQUE_IDENTIFIER_SET = set(UNIQUE_IDENTIFIER_CHARS)
SEPARATORS_RE = re.compile(r"[\s\u00A0]+")
STRICT_RE = re.compile(r"^(?:[ACDEFHKNPRTVWXY]\d{2}|D6W) [0-9ACDEFHKNPRTVWXY]{4}$", re.IGNORECASE)


def normalize(value: str) -> str:
    return SEPARATORS_RE.sub("", value.strip().upper())


def _is_word_boundary(text: str, index: int) -> bool:
    if index < 0 or index >= len(text):
        return True
    return not text[index].isalnum()


def _is_separator(ch: str) -> bool:
    return ch in " \u00A0\t\r\n"


def format_eircode(value: str) -> str:
    compact = normalize(value)
    if len(compact) != 7:
        raise ValueError("Eircode must normalize to 7 characters")
    return f"{compact[:3]} {compact[3:]}"


def is_valid_routing_key(value: str) -> bool:
    return normalize(value)[:3] in EIRCODE_ROUTING_KEY_SET


def is_valid_unique_identifier(value: str) -> bool:
    compact = normalize(value)
    if len(compact) < 7:
        return False
    return all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:7])


def is_valid_eircode(value: str, strict_spacing: bool = False) -> bool:
    compact = normalize(value)
    if len(compact) != 7:
        return False
    if compact[:3] not in EIRCODE_ROUTING_KEY_SET:
        return False
    if not all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:]):
        return False
    if strict_spacing:
        return STRICT_RE.match(value.strip().upper()) is not None
    return True


def generate_unique_identifier() -> str:
    return ''.join(random.choice(UNIQUE_IDENTIFIER_CHARS) for _ in range(4))


def generate_eircode(compact: bool = False) -> str:
    value = random.choice(EIRCODE_ROUTING_KEYS) + generate_unique_identifier()
    return value if compact else format_eircode(value)


def corrupt_eircode(value: str | None = None) -> str:
    compact = normalize(value or generate_eircode(compact=True))
    if len(compact) != 7:
        compact = normalize(generate_eircode(compact=True))
    mode = random.choice(['routing', 'suffix', 'length'])
    if mode == 'routing':
        bad_prefixes = ['B12', 'Z99', 'Q1A', 'O00']
        return format_eircode(random.choice(bad_prefixes) + compact[3:7])
    if mode == 'suffix':
        bad_chars = 'BGIJLMOQSUZ'
        pos = random.randint(3, 6)
        chars = list(compact)
        chars[pos] = random.choice(bad_chars)
        return format_eircode(''.join(chars))
    if random.random() < 0.5:
        return compact[:6]
    return compact + random.choice('BGIJLMOQSUZ')


def iter_eircode_candidates(text: str) -> Iterator[Dict[str, Any]]:
    i = 0
    n = len(text)
    while i < n:
        if not text[i].isalnum() or not _is_word_boundary(text, i - 1):
            i += 1
            continue
        if i + 3 > n:
            break
        prefix = text[i : i + 3].upper()
        if prefix not in EIRCODE_ROUTING_KEY_SET:
            i += 1
            continue
        j = i + 3
        while j < n and _is_separator(text[j]):
            j += 1
        if j + 4 > n:
            i += 1
            continue
        suffix = text[j : j + 4].upper()
        if not all(ch in UNIQUE_IDENTIFIER_SET for ch in suffix):
            i += 1
            continue
        end = j + 4
        if not _is_word_boundary(text, end):
            i += 1
            continue
        raw = text[i:end]
        yield {
            "start": i,
            "end": end,
            "text": raw,
            "normalized": normalize(raw),
        }
        i = end