#!/usr/bin/env python3 import random import re from typing import Any, Dict, Iterator # Based on the public Eircode format documentation and the public routing-area list. # Routing keys were materialized from the Wikipedia routing-area table so generation # can stay offline and reproducible in this workspace. EIRCODE_ROUTING_KEYS = ['A92', 'Y14', 'A84', 'H65', 'N37', 'R14', 'K32', 'F26', 'H53', 'P31', 'F31', 'A75', 'A41', 'F35', 'F56', 'P72', 'P75', 'H14', 'R42', 'A94', 'F52', 'A98', 'V23', 'E21', 'R93', 'A81', 'N41', 'E32', 'P43', 'E25', 'F23', 'F45', 'H12', 'P56', 'F12', 'H71', 'P85', 'H23', 'E91', 'P24', 'H16', 'T12', 'T23', 'P14', 'P32', 'P47', 'T56', 'T34', 'R56', 'A63', 'F94', 'D01', 'D02', 'D03', 'D04', 'D05', 'D06', 'D6W', 'D07', 'D08', 'D09', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D20', 'D22', 'D24', 'A86', 'A91', 'X35', 'A85', 'R45', 'A83', 'V95', 'Y21', 'P61', 'H91', 'A42', 'A96', 'Y25', 'A82', 'R51', 'R95', 'V93', 'X42', 'V35', 'V15', 'P17', 'F92', 'F93', 'V94', 'V31', 'T45', 'N39', 'H62', 'K78', 'K45', 'P12', 'K36', 'P51', 'W23', 'P25', 'P67', 'H18', 'W34', 'R21', 'N91', 'W91', 'C15', 'E45', 'Y34', 'W12', 'V42', 'A45', 'R32', 'A67', 'F42', 'E53', 'K56', 'V14', 'K34', 'P81', 'F91', 'K67', 'E41', 'E34', 'V92', 'H54', 'R35', 'X91', 'F28', 'Y35', 'P36'] EIRCODE_ROUTING_KEY_SET = set(EIRCODE_ROUTING_KEYS) UNIQUE_IDENTIFIER_CHARS = "0123456789ACDEFHKNPRTVWXY" UNIQUE_IDENTIFIER_SET = set(UNIQUE_IDENTIFIER_CHARS) SEPARATORS_RE = re.compile(r"[\s\u00A0]+") STRICT_RE = re.compile(r"^(?:[ACDEFHKNPRTVWXY]\d{2}|D6W) [0-9ACDEFHKNPRTVWXY]{4}$", re.IGNORECASE) def normalize(value: str) -> str: return SEPARATORS_RE.sub("", value.strip().upper()) def _is_word_boundary(text: str, index: int) -> bool: if index < 0 or index >= len(text): return True return not text[index].isalnum() def _is_separator(ch: str) -> bool: return ch in " \u00A0\t\r\n" def format_eircode(value: str) -> str: compact = normalize(value) if len(compact) != 7: raise ValueError("Eircode must normalize to 7 characters") return f"{compact[:3]} {compact[3:]}" def is_valid_routing_key(value: str) -> bool: return normalize(value)[:3] in EIRCODE_ROUTING_KEY_SET def is_valid_unique_identifier(value: str) -> bool: compact = normalize(value) if len(compact) < 7: return False return all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:7]) def is_valid_eircode(value: str, strict_spacing: bool = False) -> bool: compact = normalize(value) if len(compact) != 7: return False if compact[:3] not in EIRCODE_ROUTING_KEY_SET: return False if not all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:]): return False if strict_spacing: return STRICT_RE.match(value.strip().upper()) is not None return True def generate_unique_identifier() -> str: return ''.join(random.choice(UNIQUE_IDENTIFIER_CHARS) for _ in range(4)) def generate_eircode(compact: bool = False) -> str: value = random.choice(EIRCODE_ROUTING_KEYS) + generate_unique_identifier() return value if compact else format_eircode(value) def corrupt_eircode(value: str | None = None) -> str: compact = normalize(value or generate_eircode(compact=True)) if len(compact) != 7: compact = normalize(generate_eircode(compact=True)) mode = random.choice(['routing', 'suffix', 'length']) if mode == 'routing': bad_prefixes = ['B12', 'Z99', 'Q1A', 'O00'] return format_eircode(random.choice(bad_prefixes) + compact[3:7]) if mode == 'suffix': bad_chars = 'BGIJLMOQSUZ' pos = random.randint(3, 6) chars = list(compact) chars[pos] = random.choice(bad_chars) return format_eircode(''.join(chars)) if random.random() < 0.5: return compact[:6] return compact + random.choice('BGIJLMOQSUZ') def iter_eircode_candidates(text: str) -> Iterator[Dict[str, Any]]: i = 0 n = len(text) while i < n: if not text[i].isalnum() or not _is_word_boundary(text, i - 1): i += 1 continue if i + 3 > n: break prefix = text[i : i + 3].upper() if prefix not in EIRCODE_ROUTING_KEY_SET: i += 1 continue j = i + 3 while j < n and _is_separator(text[j]): j += 1 if j + 4 > n: i += 1 continue suffix = text[j : j + 4].upper() if not all(ch in UNIQUE_IDENTIFIER_SET for ch in suffix): i += 1 continue end = j + 4 if not _is_word_boundary(text, end): i += 1 continue raw = text[i:end] yield { "start": i, "end": end, "text": raw, "normalized": normalize(raw), } i = end