temsa's picture
Publish rc7 with spec-driven scanner release
32bcb86 verified
#!/usr/bin/env python3
import random
import re
from typing import Any, Dict, Iterator
# Based on the public Eircode format documentation and the public routing-area list.
# Routing keys were materialized from the Wikipedia routing-area table so generation
# can stay offline and reproducible in this workspace.
EIRCODE_ROUTING_KEYS = ['A92', 'Y14', 'A84', 'H65', 'N37', 'R14', 'K32', 'F26', 'H53', 'P31', 'F31', 'A75', 'A41', 'F35', 'F56', 'P72', 'P75', 'H14', 'R42', 'A94', 'F52', 'A98', 'V23', 'E21', 'R93', 'A81', 'N41', 'E32', 'P43', 'E25', 'F23', 'F45', 'H12', 'P56', 'F12', 'H71', 'P85', 'H23', 'E91', 'P24', 'H16', 'T12', 'T23', 'P14', 'P32', 'P47', 'T56', 'T34', 'R56', 'A63', 'F94', 'D01', 'D02', 'D03', 'D04', 'D05', 'D06', 'D6W', 'D07', 'D08', 'D09', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'D16', 'D17', 'D18', 'D20', 'D22', 'D24', 'A86', 'A91', 'X35', 'A85', 'R45', 'A83', 'V95', 'Y21', 'P61', 'H91', 'A42', 'A96', 'Y25', 'A82', 'R51', 'R95', 'V93', 'X42', 'V35', 'V15', 'P17', 'F92', 'F93', 'V94', 'V31', 'T45', 'N39', 'H62', 'K78', 'K45', 'P12', 'K36', 'P51', 'W23', 'P25', 'P67', 'H18', 'W34', 'R21', 'N91', 'W91', 'C15', 'E45', 'Y34', 'W12', 'V42', 'A45', 'R32', 'A67', 'F42', 'E53', 'K56', 'V14', 'K34', 'P81', 'F91', 'K67', 'E41', 'E34', 'V92', 'H54', 'R35', 'X91', 'F28', 'Y35', 'P36']
EIRCODE_ROUTING_KEY_SET = set(EIRCODE_ROUTING_KEYS)
UNIQUE_IDENTIFIER_CHARS = "0123456789ACDEFHKNPRTVWXY"
UNIQUE_IDENTIFIER_SET = set(UNIQUE_IDENTIFIER_CHARS)
SEPARATORS_RE = re.compile(r"[\s\u00A0]+")
STRICT_RE = re.compile(r"^(?:[ACDEFHKNPRTVWXY]\d{2}|D6W) [0-9ACDEFHKNPRTVWXY]{4}$", re.IGNORECASE)
def normalize(value: str) -> str:
return SEPARATORS_RE.sub("", value.strip().upper())
def _is_word_boundary(text: str, index: int) -> bool:
if index < 0 or index >= len(text):
return True
return not text[index].isalnum()
def _is_separator(ch: str) -> bool:
return ch in " \u00A0\t\r\n"
def format_eircode(value: str) -> str:
compact = normalize(value)
if len(compact) != 7:
raise ValueError("Eircode must normalize to 7 characters")
return f"{compact[:3]} {compact[3:]}"
def is_valid_routing_key(value: str) -> bool:
return normalize(value)[:3] in EIRCODE_ROUTING_KEY_SET
def is_valid_unique_identifier(value: str) -> bool:
compact = normalize(value)
if len(compact) < 7:
return False
return all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:7])
def is_valid_eircode(value: str, strict_spacing: bool = False) -> bool:
compact = normalize(value)
if len(compact) != 7:
return False
if compact[:3] not in EIRCODE_ROUTING_KEY_SET:
return False
if not all(ch in UNIQUE_IDENTIFIER_SET for ch in compact[3:]):
return False
if strict_spacing:
return STRICT_RE.match(value.strip().upper()) is not None
return True
def generate_unique_identifier() -> str:
return ''.join(random.choice(UNIQUE_IDENTIFIER_CHARS) for _ in range(4))
def generate_eircode(compact: bool = False) -> str:
value = random.choice(EIRCODE_ROUTING_KEYS) + generate_unique_identifier()
return value if compact else format_eircode(value)
def corrupt_eircode(value: str | None = None) -> str:
compact = normalize(value or generate_eircode(compact=True))
if len(compact) != 7:
compact = normalize(generate_eircode(compact=True))
mode = random.choice(['routing', 'suffix', 'length'])
if mode == 'routing':
bad_prefixes = ['B12', 'Z99', 'Q1A', 'O00']
return format_eircode(random.choice(bad_prefixes) + compact[3:7])
if mode == 'suffix':
bad_chars = 'BGIJLMOQSUZ'
pos = random.randint(3, 6)
chars = list(compact)
chars[pos] = random.choice(bad_chars)
return format_eircode(''.join(chars))
if random.random() < 0.5:
return compact[:6]
return compact + random.choice('BGIJLMOQSUZ')
def iter_eircode_candidates(text: str) -> Iterator[Dict[str, Any]]:
i = 0
n = len(text)
while i < n:
if not text[i].isalnum() or not _is_word_boundary(text, i - 1):
i += 1
continue
if i + 3 > n:
break
prefix = text[i : i + 3].upper()
if prefix not in EIRCODE_ROUTING_KEY_SET:
i += 1
continue
j = i + 3
while j < n and _is_separator(text[j]):
j += 1
if j + 4 > n:
i += 1
continue
suffix = text[j : j + 4].upper()
if not all(ch in UNIQUE_IDENTIFIER_SET for ch in suffix):
i += 1
continue
end = j + 4
if not _is_word_boundary(text, end):
i += 1
continue
raw = text[i:end]
yield {
"start": i,
"end": end,
"text": raw,
"normalized": normalize(raw),
}
i = end