Medica_DecisionSupportAI / schema_mapper.py
Rajan Sharma
Update schema_mapper.py
e324bb0 verified
raw
history blame
5.7 kB
# schema_mapper.py
from typing import Dict, List, Any, Set
import re
# Healthcare terminology mappings
HEALTHCARE_CONCEPTS = {
# Facility types
"hospital": ["hospital", "medical center", "health centre", "clinic"],
"nursing_facility": ["nursing home", "long-term care", "residential care", "care facility"],
"ambulatory_care": ["ambulatory", "outpatient", "clinic", "surgery center"],
# Capacity metrics
"bed_capacity": ["beds", "capacity", "bed count", "staffed beds"],
"occupancy_rate": ["occupancy", "utilization", "bed occupancy"],
# Geographic terms
"zone": ["zone", "region", "area", "district"],
"province": ["province", "state", "territory"],
# Time periods
"fiscal_year": ["fiscal year", "fy", "financial year"],
"current_period": ["current", "2023-24", "present", "latest"],
"previous_period": ["previous", "2022-23", "past", "last"],
# Healthcare operations
"patient_flow": ["patient flow", "throughput", "patient movement"],
"resource_allocation": ["resource allocation", "staffing", "resource distribution"],
"surge_capacity": ["surge", "overflow", "emergency capacity"],
}
class MappingResult:
def __init__(self):
self.resolved = {} # Successfully mapped concepts
self.ambiguous = {} # Concepts with multiple possible mappings
self.missing = set() # Concepts that couldn't be mapped
def map_concepts(scenario_text: str, data_registry) -> MappingResult:
"""Map healthcare concepts from scenario text to data registry."""
result = MappingResult()
# Extract key terms from scenario
scenario_lower = scenario_text.lower()
# Check for healthcare concepts
for concept, synonyms in HEALTHCARE_CONCEPTS.items():
# Check if any synonym appears in the scenario
found_synonyms = [syn for syn in synonyms if syn in scenario_lower]
if found_synonyms:
# Try to map to data registry
mapped_to = _map_to_data_registry(concept, data_registry)
if mapped_to:
result.resolved[concept] = mapped_to
else:
result.missing.add(concept)
# Additional mapping for specific healthcare patterns
# Check for facility distribution patterns
if any(phrase in scenario_lower for phrase in ["facility distribution", "facility count", "number of facilities"]):
if any("facility" in name.lower() for name in data_registry.names()):
result.resolved["facility_distribution"] = next(
(name for name in data_registry.names() if "facility" in name.lower()), None
)
else:
result.missing.add("facility_distribution")
# Check for bed capacity patterns
if any(phrase in scenario_lower for phrase in ["bed capacity", "bed count", "staffed beds"]):
if any("bed" in name.lower() for name in data_registry.names()):
result.resolved["bed_capacity"] = next(
(name for name in data_registry.names() if "bed" in name.lower()), None
)
else:
result.missing.add("bed_capacity")
# Check for long-term care patterns
if any(phrase in scenario_lower for phrase in ["long-term care", "ltc", "nursing capacity"]):
result.resolved["long_term_care"] = "facility_distribution" # Usually in facility data
return result
def _map_to_data_registry(concept: str, data_registry) -> Any:
"""Helper to map a concept to the data registry."""
file_names = data_registry.names()
if concept in ["hospital", "facility_distribution", "long_term_care"]:
return next((name for name in file_names if "facility" in name.lower() or "health" in name.lower()), None)
elif concept == "bed_capacity":
return next((name for name in file_names if "bed" in name.lower()), None)
elif concept == "zone":
# Check if any dataframe has a 'zone' column
for name in file_names:
df = data_registry.get(name)
if df is not None and 'zone' in df.columns:
return name
return None
return None
def build_phase1_questions(scenario_text: str, registry, mapping: MappingResult) -> str:
"""Build clarifying questions based on mapping results."""
questions = []
# If we have good mapping, we might not need questions
if len(mapping.resolved) > len(mapping.missing) and len(mapping.ambiguous) == 0:
return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals."
# Questions for missing concepts
if mapping.missing:
questions.append("### Missing Information")
for concept in mapping.missing:
if concept == "facility_distribution":
questions.append("- Do you have data about healthcare facilities and their distribution?")
elif concept == "bed_capacity":
questions.append("- Do you have data about hospital bed capacity and changes over time?")
else:
questions.append(f"- Can you provide more information about {concept}?")
# Questions for ambiguous concepts
if mapping.ambiguous:
questions.append("### Clarification Needed")
for concept, options in mapping.ambiguous.items():
questions.append(f"- For '{concept}', did you mean: {', '.join(options)}?")
if not questions:
return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals."
return "\n".join(questions)