import re from typing import List, Dict, Tuple ROLE_SYNONYMS = { "facility": ["facility", "hospital", "site", "centre", "clinic", "settlement", "community"], "zone": ["zone", "region", "area", "district"], "specialty": ["specialty", "service", "discipline"], "population": ["population", "residents", "members"], "cost": ["cost", "expense", "spend"], "outcome": ["outcome", "improvement", "result"] } def _norm(s: str) -> str: return re.sub(r'[^a-z0-9]+', '', s.lower()) def resolve_role(role: str, columns: List[str]) -> Tuple[str | None, float]: """Return best matching column for semantic role with confidence score.""" target = _norm(role) syns = ROLE_SYNONYMS.get(target, [role]) best, score = None, 0.0 for c in columns: nc = _norm(c) for s in syns: ns = _norm(s) if ns in nc or nc in ns: sc = len(ns) / max(len(nc), 1) if sc > score: best, score = c, sc return best, score