Rajan Sharma commited on
Commit
6953f37
·
verified ·
1 Parent(s): 492569d

Update column_resolver.py

Browse files
Files changed (1) hide show
  1. column_resolver.py +23 -12
column_resolver.py CHANGED
@@ -1,18 +1,29 @@
1
  import re
2
- from typing import List, Dict
 
 
 
 
 
 
 
 
 
3
 
4
  def _norm(s: str) -> str:
5
  return re.sub(r'[^a-z0-9]+', '', s.lower())
6
 
7
- def build_alias_map(columns: List[str]) -> Dict[str, str]:
8
- m = {}
 
 
 
9
  for c in columns:
10
- n = _norm(c)
11
- m[n] = c
12
- if n.endswith('s') and n[:-1] not in m: m[n[:-1]] = c
13
- if not n.endswith('s') and n+'s' not in m: m[n+'s'] = c
14
- return m
15
-
16
- def resolve_cols(requested: List[str], available: List[str]) -> List[str]:
17
- alias = build_alias_map(available)
18
- return [alias.get(_norm(r), r) for r in requested]
 
1
  import re
2
+ from typing import List, Dict, Tuple
3
+
4
+ ROLE_SYNONYMS = {
5
+ "facility": ["facility", "hospital", "site", "centre", "clinic", "settlement", "community"],
6
+ "zone": ["zone", "region", "area", "district"],
7
+ "specialty": ["specialty", "service", "discipline"],
8
+ "population": ["population", "residents", "members"],
9
+ "cost": ["cost", "expense", "spend"],
10
+ "outcome": ["outcome", "improvement", "result"]
11
+ }
12
 
13
  def _norm(s: str) -> str:
14
  return re.sub(r'[^a-z0-9]+', '', s.lower())
15
 
16
+ def resolve_role(role: str, columns: List[str]) -> Tuple[str | None, float]:
17
+ """Return best matching column for semantic role with confidence score."""
18
+ target = _norm(role)
19
+ syns = ROLE_SYNONYMS.get(target, [role])
20
+ best, score = None, 0.0
21
  for c in columns:
22
+ nc = _norm(c)
23
+ for s in syns:
24
+ ns = _norm(s)
25
+ if ns in nc or nc in ns:
26
+ sc = len(ns) / max(len(nc), 1)
27
+ if sc > score:
28
+ best, score = c, sc
29
+ return best, score