File size: 5,697 Bytes
e324bb0
 
49f10c8
 
e324bb0
 
 
 
 
 
13953b9
 
e324bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49f10c8
 
 
e324bb0
 
 
 
49f10c8
e324bb0
 
49f10c8
13953b9
 
e324bb0
c2ce9a8
e324bb0
 
 
 
13953b9
e324bb0
 
 
 
 
c2ce9a8
e324bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2ce9a8
 
 
e324bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13953b9
 
e324bb0
 
 
 
 
 
 
 
 
 
 
 
 
 
13953b9
e324bb0
 
 
 
 
13953b9
 
e324bb0
13953b9
e324bb0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# schema_mapper.py
from typing import Dict, List, Any, Set
import re

# Healthcare terminology mappings
HEALTHCARE_CONCEPTS = {
    # Facility types
    "hospital": ["hospital", "medical center", "health centre", "clinic"],
    "nursing_facility": ["nursing home", "long-term care", "residential care", "care facility"],
    "ambulatory_care": ["ambulatory", "outpatient", "clinic", "surgery center"],
    
    # Capacity metrics
    "bed_capacity": ["beds", "capacity", "bed count", "staffed beds"],
    "occupancy_rate": ["occupancy", "utilization", "bed occupancy"],
    
    # Geographic terms
    "zone": ["zone", "region", "area", "district"],
    "province": ["province", "state", "territory"],
    
    # Time periods
    "fiscal_year": ["fiscal year", "fy", "financial year"],
    "current_period": ["current", "2023-24", "present", "latest"],
    "previous_period": ["previous", "2022-23", "past", "last"],
    
    # Healthcare operations
    "patient_flow": ["patient flow", "throughput", "patient movement"],
    "resource_allocation": ["resource allocation", "staffing", "resource distribution"],
    "surge_capacity": ["surge", "overflow", "emergency capacity"],
}

class MappingResult:
    def __init__(self):
        self.resolved = {}  # Successfully mapped concepts
        self.ambiguous = {}  # Concepts with multiple possible mappings
        self.missing = set()  # Concepts that couldn't be mapped

def map_concepts(scenario_text: str, data_registry) -> MappingResult:
    """Map healthcare concepts from scenario text to data registry."""
    result = MappingResult()
    
    # Extract key terms from scenario
    scenario_lower = scenario_text.lower()
    
    # Check for healthcare concepts
    for concept, synonyms in HEALTHCARE_CONCEPTS.items():
        # Check if any synonym appears in the scenario
        found_synonyms = [syn for syn in synonyms if syn in scenario_lower]
        
        if found_synonyms:
            # Try to map to data registry
            mapped_to = _map_to_data_registry(concept, data_registry)
            if mapped_to:
                result.resolved[concept] = mapped_to
            else:
                result.missing.add(concept)
    
    # Additional mapping for specific healthcare patterns
    # Check for facility distribution patterns
    if any(phrase in scenario_lower for phrase in ["facility distribution", "facility count", "number of facilities"]):
        if any("facility" in name.lower() for name in data_registry.names()):
            result.resolved["facility_distribution"] = next(
                (name for name in data_registry.names() if "facility" in name.lower()), None
            )
        else:
            result.missing.add("facility_distribution")
    
    # Check for bed capacity patterns
    if any(phrase in scenario_lower for phrase in ["bed capacity", "bed count", "staffed beds"]):
        if any("bed" in name.lower() for name in data_registry.names()):
            result.resolved["bed_capacity"] = next(
                (name for name in data_registry.names() if "bed" in name.lower()), None
            )
        else:
            result.missing.add("bed_capacity")
    
    # Check for long-term care patterns
    if any(phrase in scenario_lower for phrase in ["long-term care", "ltc", "nursing capacity"]):
        result.resolved["long_term_care"] = "facility_distribution"  # Usually in facility data
    
    return result

def _map_to_data_registry(concept: str, data_registry) -> Any:
    """Helper to map a concept to the data registry."""
    file_names = data_registry.names()
    
    if concept in ["hospital", "facility_distribution", "long_term_care"]:
        return next((name for name in file_names if "facility" in name.lower() or "health" in name.lower()), None)
    elif concept == "bed_capacity":
        return next((name for name in file_names if "bed" in name.lower()), None)
    elif concept == "zone":
        # Check if any dataframe has a 'zone' column
        for name in file_names:
            df = data_registry.get(name)
            if df is not None and 'zone' in df.columns:
                return name
        return None
    
    return None

def build_phase1_questions(scenario_text: str, registry, mapping: MappingResult) -> str:
    """Build clarifying questions based on mapping results."""
    questions = []
    
    # If we have good mapping, we might not need questions
    if len(mapping.resolved) > len(mapping.missing) and len(mapping.ambiguous) == 0:
        return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals."
    
    # Questions for missing concepts
    if mapping.missing:
        questions.append("### Missing Information")
        for concept in mapping.missing:
            if concept == "facility_distribution":
                questions.append("- Do you have data about healthcare facilities and their distribution?")
            elif concept == "bed_capacity":
                questions.append("- Do you have data about hospital bed capacity and changes over time?")
            else:
                questions.append(f"- Can you provide more information about {concept}?")
    
    # Questions for ambiguous concepts
    if mapping.ambiguous:
        questions.append("### Clarification Needed")
        for concept, options in mapping.ambiguous.items():
            questions.append(f"- For '{concept}', did you mean: {', '.join(options)}?")
    
    if not questions:
        return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals."
    
    return "\n".join(questions)