Medica_DecisionSupportAI / column_resolver.py
Rajan Sharma
Update column_resolver.py
6953f37 verified
import re
from typing import List, Dict, Tuple
ROLE_SYNONYMS = {
"facility": ["facility", "hospital", "site", "centre", "clinic", "settlement", "community"],
"zone": ["zone", "region", "area", "district"],
"specialty": ["specialty", "service", "discipline"],
"population": ["population", "residents", "members"],
"cost": ["cost", "expense", "spend"],
"outcome": ["outcome", "improvement", "result"]
}
def _norm(s: str) -> str:
return re.sub(r'[^a-z0-9]+', '', s.lower())
def resolve_role(role: str, columns: List[str]) -> Tuple[str | None, float]:
"""Return best matching column for semantic role with confidence score."""
target = _norm(role)
syns = ROLE_SYNONYMS.get(target, [role])
best, score = None, 0.0
for c in columns:
nc = _norm(c)
for s in syns:
ns = _norm(s)
if ns in nc or nc in ns:
sc = len(ns) / max(len(nc), 1)
if sc > score:
best, score = c, sc
return best, score