| | |
| | import random |
| | import re |
| | from typing import Any, Dict, Iterator |
| |
|
| | |
| | |
| | |
| |
|
| | EIRCODE_ROUTING_KEYS = ['A92', 'Y14', 'A84', 'H65', 'N37', 'R14', 'K32', 'F26', 'H53', 'P31', 'F31', 'A75', 'A41', 'F35', 'F56', 'P72', 'P75', 'H14', 'R42', 'A94', 'F52', 'A98', 'V23', 'E21', 'R93', 'A81', 'N41', 'E32', 'P43', 'E25', 'F23', 'F45', 'H12', 'P56', 'F12', 'H71', 'P85', 'H23', 'E91', 'P24', 'H16', 'T12', 'T23', 'P14', 'P32', 'P47', 'T56', 'T34', 'R56', 'A63', 'F94', 'D01', 'D02', 'D03', 'D04', 'D05', 'D06', 'D6W', 'D07', 'D08', 'D09', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D20', 'D22', 'D24', 'A86', 'A91', 'X35', 'A85', 'R45', 'A83', 'V95', 'Y21', 'P61', 'H91', 'A42', 'A96', 'Y25', 'A82', 'R51', 'R95', 'V93', 'X42', 'V35', 'V15', 'P17', 'F92', 'F93', 'V94', 'V31', 'T45', 'N39', 'H62', 'K78', 'K45', 'P12', 'K36', 'P51', 'W23', 'P25', 'P67', 'H18', 'W34', 'R21', 'N91', 'W91', 'C15', 'E45', 'Y34', 'W12', 'V42', 'A45', 'R32', 'A67', 'F42', 'E53', 'K56', 'V14', 'K34', 'P81', 'F91', 'K67', 'E41', 'E34', 'V92', 'H54', 'R35', 'X91', 'F28', 'Y35', 'P36'] |
| | EIRCODE_ROUTING_KEY_SET = set(EIRCODE_ROUTING_KEYS) |
| | UNIQUE_IDENTIFIER_CHARS = "0123456789ACDEFHKNPRTVWXY" |
| | UNIQUE_IDENTIFIER_SET = set(UNIQUE_IDENTIFIER_CHARS) |
| | SEPARATORS_RE = re.compile(r"[\s\u00A0]+") |
| | STRICT_RE = re.compile(r"^(?:[ACDEFHKNPRTVWXY]\d{2}|D6W) [0-9ACDEFHKNPRTVWXY]{4}$", re.IGNORECASE) |
| |
|
| |
|
| | def normalize(value: str) -> str: |
| | return SEPARATORS_RE.sub("", value.strip().upper()) |
| |
|
| |
|
| | def _is_word_boundary(text: str, index: int) -> bool: |
| | if index < 0 or index >= len(text): |
| | return True |
| | return not text[index].isalnum() |
| |
|
| |
|
| | def _is_separator(ch: str) -> bool: |
| | return ch in " \u00A0\t\r\n" |
| |
|
| |
|
| | def format_eircode(value: str) -> str: |
| | compact = normalize(value) |
| | if len(compact) != 7: |
| | raise ValueError("Eircode must normalize to 7 characters") |
| | return f"{compact[:3]} {compact[3:]}" |
| |
|
| |
|
| | def is_valid_routing_key(value: str) -> bool: |
| | return normalize(value)[:3] in EIRCODE_ROUTING_KEY_SET |
| |
|
| |
|
| | def is_valid_unique_identifier(value: str) -> bool: |
| | compact = normalize(value) |
| | if len(compact) < 7: |
| | return False |
| | return all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:7]) |
| |
|
| |
|
| | def is_valid_eircode(value: str, strict_spacing: bool = False) -> bool: |
| | compact = normalize(value) |
| | if len(compact) != 7: |
| | return False |
| | if compact[:3] not in EIRCODE_ROUTING_KEY_SET: |
| | return False |
| | if not all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:]): |
| | return False |
| | if strict_spacing: |
| | return STRICT_RE.match(value.strip().upper()) is not None |
| | return True |
| |
|
| |
|
| | def generate_unique_identifier() -> str: |
| | return ''.join(random.choice(UNIQUE_IDENTIFIER_CHARS) for _ in range(4)) |
| |
|
| |
|
| | def generate_eircode(compact: bool = False) -> str: |
| | value = random.choice(EIRCODE_ROUTING_KEYS) + generate_unique_identifier() |
| | return value if compact else format_eircode(value) |
| |
|
| |
|
| | def corrupt_eircode(value: str | None = None) -> str: |
| | compact = normalize(value or generate_eircode(compact=True)) |
| | if len(compact) != 7: |
| | compact = normalize(generate_eircode(compact=True)) |
| | mode = random.choice(['routing', 'suffix', 'length']) |
| | if mode == 'routing': |
| | bad_prefixes = ['B12', 'Z99', 'Q1A', 'O00'] |
| | return format_eircode(random.choice(bad_prefixes) + compact[3:7]) |
| | if mode == 'suffix': |
| | bad_chars = 'BGIJLMOQSUZ' |
| | pos = random.randint(3, 6) |
| | chars = list(compact) |
| | chars[pos] = random.choice(bad_chars) |
| | return format_eircode(''.join(chars)) |
| | if random.random() < 0.5: |
| | return compact[:6] |
| | return compact + random.choice('BGIJLMOQSUZ') |
| |
|
| |
|
| | def iter_eircode_candidates(text: str) -> Iterator[Dict[str, Any]]: |
| | i = 0 |
| | n = len(text) |
| | while i < n: |
| | if not text[i].isalnum() or not _is_word_boundary(text, i - 1): |
| | i += 1 |
| | continue |
| | if i + 3 > n: |
| | break |
| | prefix = text[i : i + 3].upper() |
| | if prefix not in EIRCODE_ROUTING_KEY_SET: |
| | i += 1 |
| | continue |
| | j = i + 3 |
| | while j < n and _is_separator(text[j]): |
| | j += 1 |
| | if j + 4 > n: |
| | i += 1 |
| | continue |
| | suffix = text[j : j + 4].upper() |
| | if not all(ch in UNIQUE_IDENTIFIER_SET for ch in suffix): |
| | i += 1 |
| | continue |
| | end = j + 4 |
| | if not _is_word_boundary(text, end): |
| | i += 1 |
| | continue |
| | raw = text[i:end] |
| | yield { |
| | "start": i, |
| | "end": end, |
| | "text": raw, |
| | "normalized": normalize(raw), |
| | } |
| | i = end |
| |
|