Spaces:
Sleeping
Sleeping
| # schema_mapper.py | |
| from typing import Dict, List, Any, Set | |
| import re | |
| # Healthcare terminology mappings | |
| HEALTHCARE_CONCEPTS = { | |
| # Facility types | |
| "hospital": ["hospital", "medical center", "health centre", "clinic"], | |
| "nursing_facility": ["nursing home", "long-term care", "residential care", "care facility"], | |
| "ambulatory_care": ["ambulatory", "outpatient", "clinic", "surgery center"], | |
| # Capacity metrics | |
| "bed_capacity": ["beds", "capacity", "bed count", "staffed beds"], | |
| "occupancy_rate": ["occupancy", "utilization", "bed occupancy"], | |
| # Geographic terms | |
| "zone": ["zone", "region", "area", "district"], | |
| "province": ["province", "state", "territory"], | |
| # Time periods | |
| "fiscal_year": ["fiscal year", "fy", "financial year"], | |
| "current_period": ["current", "2023-24", "present", "latest"], | |
| "previous_period": ["previous", "2022-23", "past", "last"], | |
| # Healthcare operations | |
| "patient_flow": ["patient flow", "throughput", "patient movement"], | |
| "resource_allocation": ["resource allocation", "staffing", "resource distribution"], | |
| "surge_capacity": ["surge", "overflow", "emergency capacity"], | |
| } | |
| class MappingResult: | |
| def __init__(self): | |
| self.resolved = {} # Successfully mapped concepts | |
| self.ambiguous = {} # Concepts with multiple possible mappings | |
| self.missing = set() # Concepts that couldn't be mapped | |
| def map_concepts(scenario_text: str, data_registry) -> MappingResult: | |
| """Map healthcare concepts from scenario text to data registry.""" | |
| result = MappingResult() | |
| # Extract key terms from scenario | |
| scenario_lower = scenario_text.lower() | |
| # Check for healthcare concepts | |
| for concept, synonyms in HEALTHCARE_CONCEPTS.items(): | |
| # Check if any synonym appears in the scenario | |
| found_synonyms = [syn for syn in synonyms if syn in scenario_lower] | |
| if found_synonyms: | |
| # Try to map to data registry | |
| mapped_to = _map_to_data_registry(concept, data_registry) | |
| if mapped_to: | |
| result.resolved[concept] = mapped_to | |
| else: | |
| result.missing.add(concept) | |
| # Additional mapping for specific healthcare patterns | |
| # Check for facility distribution patterns | |
| if any(phrase in scenario_lower for phrase in ["facility distribution", "facility count", "number of facilities"]): | |
| if any("facility" in name.lower() for name in data_registry.names()): | |
| result.resolved["facility_distribution"] = next( | |
| (name for name in data_registry.names() if "facility" in name.lower()), None | |
| ) | |
| else: | |
| result.missing.add("facility_distribution") | |
| # Check for bed capacity patterns | |
| if any(phrase in scenario_lower for phrase in ["bed capacity", "bed count", "staffed beds"]): | |
| if any("bed" in name.lower() for name in data_registry.names()): | |
| result.resolved["bed_capacity"] = next( | |
| (name for name in data_registry.names() if "bed" in name.lower()), None | |
| ) | |
| else: | |
| result.missing.add("bed_capacity") | |
| # Check for long-term care patterns | |
| if any(phrase in scenario_lower for phrase in ["long-term care", "ltc", "nursing capacity"]): | |
| result.resolved["long_term_care"] = "facility_distribution" # Usually in facility data | |
| return result | |
| def _map_to_data_registry(concept: str, data_registry) -> Any: | |
| """Helper to map a concept to the data registry.""" | |
| file_names = data_registry.names() | |
| if concept in ["hospital", "facility_distribution", "long_term_care"]: | |
| return next((name for name in file_names if "facility" in name.lower() or "health" in name.lower()), None) | |
| elif concept == "bed_capacity": | |
| return next((name for name in file_names if "bed" in name.lower()), None) | |
| elif concept == "zone": | |
| # Check if any dataframe has a 'zone' column | |
| for name in file_names: | |
| df = data_registry.get(name) | |
| if df is not None and 'zone' in df.columns: | |
| return name | |
| return None | |
| return None | |
| def build_phase1_questions(scenario_text: str, registry, mapping: MappingResult) -> str: | |
| """Build clarifying questions based on mapping results.""" | |
| questions = [] | |
| # If we have good mapping, we might not need questions | |
| if len(mapping.resolved) > len(mapping.missing) and len(mapping.ambiguous) == 0: | |
| return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals." | |
| # Questions for missing concepts | |
| if mapping.missing: | |
| questions.append("### Missing Information") | |
| for concept in mapping.missing: | |
| if concept == "facility_distribution": | |
| questions.append("- Do you have data about healthcare facilities and their distribution?") | |
| elif concept == "bed_capacity": | |
| questions.append("- Do you have data about hospital bed capacity and changes over time?") | |
| else: | |
| questions.append(f"- Can you provide more information about {concept}?") | |
| # Questions for ambiguous concepts | |
| if mapping.ambiguous: | |
| questions.append("### Clarification Needed") | |
| for concept, options in mapping.ambiguous.items(): | |
| questions.append(f"- For '{concept}', did you mean: {', '.join(options)}?") | |
| if not questions: | |
| return "**Data Analysis Ready**: Your data appears well-structured. Please provide any additional context about your analysis goals." | |
| return "\n".join(questions) |