File size: 1,027 Bytes
7935397
6953f37
 
 
 
 
 
 
 
 
 
7935397
 
 
 
6953f37
 
 
 
 
7935397
6953f37
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import re
from typing import List, Dict, Tuple

ROLE_SYNONYMS = {
    "facility": ["facility", "hospital", "site", "centre", "clinic", "settlement", "community"],
    "zone": ["zone", "region", "area", "district"],
    "specialty": ["specialty", "service", "discipline"],
    "population": ["population", "residents", "members"],
    "cost": ["cost", "expense", "spend"],
    "outcome": ["outcome", "improvement", "result"]
}

def _norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]+', '', s.lower())

def resolve_role(role: str, columns: List[str]) -> Tuple[str | None, float]:
    """Return best matching column for semantic role with confidence score."""
    target = _norm(role)
    syns = ROLE_SYNONYMS.get(target, [role])
    best, score = None, 0.0
    for c in columns:
        nc = _norm(c)
        for s in syns:
            ns = _norm(s)
            if ns in nc or nc in ns:
                sc = len(ns) / max(len(nc), 1)
                if sc > score:
                    best, score = c, sc
    return best, score