Spaces:
Sleeping
Sleeping
| import re | |
| from typing import List, Dict, Tuple | |
| ROLE_SYNONYMS = { | |
| "facility": ["facility", "hospital", "site", "centre", "clinic", "settlement", "community"], | |
| "zone": ["zone", "region", "area", "district"], | |
| "specialty": ["specialty", "service", "discipline"], | |
| "population": ["population", "residents", "members"], | |
| "cost": ["cost", "expense", "spend"], | |
| "outcome": ["outcome", "improvement", "result"] | |
| } | |
| def _norm(s: str) -> str: | |
| return re.sub(r'[^a-z0-9]+', '', s.lower()) | |
| def resolve_role(role: str, columns: List[str]) -> Tuple[str | None, float]: | |
| """Return best matching column for semantic role with confidence score.""" | |
| target = _norm(role) | |
| syns = ROLE_SYNONYMS.get(target, [role]) | |
| best, score = None, 0.0 | |
| for c in columns: | |
| nc = _norm(c) | |
| for s in syns: | |
| ns = _norm(s) | |
| if ns in nc or nc in ns: | |
| sc = len(ns) / max(len(nc), 1) | |
| if sc > score: | |
| best, score = c, sc | |
| return best, score | |