Spaces:
Sleeping
Sleeping
| # healthcare_analysis.py | |
| import pandas as pd | |
| import numpy as np | |
| from typing import Dict, List, Any, Optional, Tuple | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class HealthcareAnalyzer: | |
| def __init__(self, data_registry): | |
| self.data_registry = data_registry | |
| self.analysis_results = {} | |
| def comprehensive_analysis(self, scenario_text: str) -> Dict[str, Any]: | |
| """Perform comprehensive healthcare scenario analysis""" | |
| logger.info("Starting comprehensive healthcare analysis") | |
| # Extract tasks and requirements | |
| tasks = self._extract_tasks(scenario_text) | |
| requirements = self._extract_requirements(scenario_text) | |
| # Identify relevant datasets | |
| relevant_data = self._identify_relevant_data(scenario_text) | |
| # Perform analyses based on tasks | |
| results = {} | |
| if "facility_distribution" in tasks: | |
| results["facility_distribution"] = self.analyze_facility_distribution(relevant_data) | |
| if "capacity_analysis" in tasks: | |
| results["capacity_analysis"] = self.analyze_capacity(relevant_data) | |
| if "resource_allocation" in tasks: | |
| results["resource_allocation"] = self.analyze_resource_allocation(relevant_data) | |
| if "trends" in tasks: | |
| results["trends"] = self.analyze_trends(relevant_data) | |
| # Generate recommendations | |
| results["recommendations"] = self.generate_recommendations(results, requirements) | |
| # Future integration opportunities | |
| results["future_integration"] = self.identify_integration_opportunities(results) | |
| logger.info("Comprehensive analysis completed") | |
| return results | |
| def _extract_tasks(self, scenario_text: str) -> List[str]: | |
| """Extract specific tasks from scenario text""" | |
| tasks = [] | |
| task_keywords = { | |
| "facility_distribution": ["facility", "distribution", "location", "sites"], | |
| "capacity_analysis": ["capacity", "beds", "occupancy", "utilization"], | |
| "resource_allocation": ["resource", "allocation", "staffing", "equipment"], | |
| "trends": ["trend", "change", "growth", "decline", "pattern"] | |
| } | |
| for task_type, keywords in task_keywords.items(): | |
| if any(kw in scenario_text.lower() for kw in keywords): | |
| tasks.append(task_type) | |
| return tasks | |
| def _extract_requirements(self, scenario_text: str) -> Dict[str, Any]: | |
| """Extract specific requirements from scenario text""" | |
| return { | |
| "geographic_scope": self._extract_geographic_scope(scenario_text), | |
| "time_period": self._extract_time_period(scenario_text), | |
| "facility_types": self._extract_facility_types(scenario_text), | |
| "metrics_needed": self._extract_metrics(scenario_text) | |
| } | |
| def analyze_facility_distribution(self, relevant_data: List[str]) -> Dict[str, Any]: | |
| """Enhanced facility distribution analysis""" | |
| results = {} | |
| for data_name in relevant_data: | |
| df = self.data_registry.get(data_name) | |
| if df is None: | |
| continue | |
| # Geographic distribution | |
| geo_col = self._find_column(df, ['province', 'state', 'region', 'zone']) | |
| if geo_col: | |
| geo_dist = df[geo_col].value_counts().to_dict() | |
| results["geographic_distribution"] = geo_dist | |
| # Calculate Gini coefficient for inequality | |
| gini = self._calculate_gini(list(geo_dist.values())) | |
| results["geographic_inequality"] = gini | |
| # Facility type distribution | |
| type_col = self._find_column(df, ['type', 'category', 'facility_type']) | |
| if type_col: | |
| type_dist = df[type_col].value_counts().to_dict() | |
| results["facility_type_distribution"] = type_dist | |
| # Calculate diversity index | |
| diversity = self._calculate_diversity_index(type_dist) | |
| results["facility_diversity"] = diversity | |
| # Urban vs rural distribution | |
| urban_col = self._find_column(df, ['urban', 'rural', 'location_type']) | |
| if urban_col: | |
| urban_rural = df[urban_col].value_counts().to_dict() | |
| results["urban_rural_distribution"] = urban_rural | |
| return results | |
| def analyze_capacity(self, relevant_data: List[str]) -> Dict[str, Any]: | |
| """Enhanced capacity analysis""" | |
| results = {} | |
| for data_name in relevant_data: | |
| df = self.data_registry.get(data_name) | |
| if df is None: | |
| continue | |
| # Current capacity | |
| capacity_col = self._find_column(df, ['capacity', 'beds', 'current_capacity']) | |
| if capacity_col: | |
| total_capacity = df[capacity_col].sum() | |
| results["total_capacity"] = total_capacity | |
| # Capacity by facility type | |
| type_col = self._find_column(df, ['type', 'facility_type']) | |
| if type_col: | |
| capacity_by_type = df.groupby(type_col)[capacity_col].sum().to_dict() | |
| results["capacity_by_type"] = capacity_by_type | |
| # Capacity utilization | |
| utilization_col = self._find_column(df, ['utilization', 'occupancy', 'occupancy_rate']) | |
| if utilization_col: | |
| avg_utilization = df[utilization_col].mean() | |
| results["average_utilization"] = avg_utilization | |
| # Utilization by facility type | |
| if type_col: | |
| utilization_by_type = df.groupby(type_col)[utilization_col].mean().to_dict() | |
| results["utilization_by_type"] = utilization_by_type | |
| # Capacity trends | |
| time_cols = [col for col in df.columns if any(year in col.lower() for year in ['2020', '2021', '2022', '2023', '2024'])] | |
| if len(time_cols) >= 2: | |
| trend_data = {} | |
| for col in time_cols: | |
| trend_data[col] = df[col].sum() | |
| results["capacity_trends"] = trend_data | |
| # Calculate growth rate | |
| if len(time_cols) >= 2: | |
| latest = time_cols[-1] | |
| earliest = time_cols[0] | |
| growth_rate = (trend_data[latest] - trend_data[earliest]) / trend_data[earliest] * 100 | |
| results["capacity_growth_rate"] = growth_rate | |
| return results | |
| def analyze_resource_allocation(self, relevant_data: List[str]) -> Dict[str, Any]: | |
| """Analyze resource allocation patterns""" | |
| results = {} | |
| for data_name in relevant_data: | |
| df = self.data_registry.get(data_name) | |
| if df is None: | |
| continue | |
| # Staff analysis | |
| staff_col = self._find_column(df, ['staff', 'employees', 'fte']) | |
| if staff_col: | |
| total_staff = df[staff_col].sum() | |
| results["total_staff"] = total_staff | |
| # Staff per bed ratio | |
| capacity_col = self._find_column(df, ['capacity', 'beds']) | |
| if capacity_col: | |
| df['staff_per_bed'] = df[staff_col] / df[capacity_col] | |
| avg_staff_per_bed = df['staff_per_bed'].mean() | |
| results["staff_per_bed_ratio"] = avg_staff_per_bed | |
| # Equipment analysis | |
| equipment_cols = [col for col in df.columns if 'equipment' in col.lower()] | |
| if equipment_cols: | |
| equipment_summary = {} | |
| for col in equipment_cols: | |
| equipment_summary[col] = df[col].sum() | |
| results["equipment_summary"] = equipment_summary | |
| return results | |
| def analyze_trends(self, relevant_data: List[str]) -> Dict[str, Any]: | |
| """Analyze trends in healthcare data""" | |
| results = {} | |
| for data_name in relevant_data: | |
| df = self.data_registry.get(data_name) | |
| if df is None: | |
| continue | |
| # Find time-based columns | |
| time_cols = [col for col in df.columns if any(year in col.lower() for year in ['2020', '2021', '2022', '2023', '2024'])] | |
| if len(time_cols) >= 2: | |
| trends = {} | |
| # Calculate year-over-year changes | |
| for i in range(1, len(time_cols)): | |
| prev_year = time_cols[i-1] | |
| curr_year = time_cols[i] | |
| prev_total = df[prev_year].sum() | |
| curr_total = df[curr_year].sum() | |
| if prev_total > 0: | |
| change_pct = (curr_total - prev_total) / prev_total * 100 | |
| trends[f"{prev_year}_to_{curr_year}"] = { | |
| "absolute_change": curr_total - prev_total, | |
| "percentage_change": change_pct | |
| } | |
| results["year_over_year_trends"] = trends | |
| return results | |
| def generate_recommendations(self, analysis_results: Dict[str, Any], requirements: Dict[str, Any]) -> List[Dict[str, str]]: | |
| """Generate data-driven operational recommendations""" | |
| recommendations = [] | |
| # Capacity-related recommendations | |
| if "capacity_analysis" in analysis_results: | |
| capacity = analysis_results["capacity_analysis"] | |
| # Low utilization recommendations | |
| if "average_utilization" in capacity and capacity["average_utilization"] < 0.7: | |
| recommendations.append({ | |
| "title": "Optimize Underutilized Capacity", | |
| "description": f"Average utilization is {capacity['average_utilization']:.1%}. Consider repurposing underutilized facilities or consolidating services.", | |
| "priority": "Medium", | |
| "data_source": "Capacity utilization analysis" | |
| }) | |
| # Capacity growth recommendations | |
| if "capacity_growth_rate" in capacity and capacity["capacity_growth_rate"] < 2: | |
| recommendations.append({ | |
| "title": "Expand Capacity Strategically", | |
| "description": f"Capacity growth rate is only {capacity['capacity_growth_rate']:.1f}%. Invest in new facilities or expand existing ones to meet demand.", | |
| "priority": "High", | |
| "data_source": "Capacity trend analysis" | |
| }) | |
| # Geographic distribution recommendations | |
| if "facility_distribution" in analysis_results: | |
| dist = analysis_results["facility_distribution"] | |
| if "geographic_inequality" in dist and dist["geographic_inequality"] > 0.4: | |
| recommendations.append({ | |
| "title": "Address Geographic Inequity", | |
| "description": f"High geographic inequality (Gini: {dist['geographic_inequality']:.2f}). Consider targeted investments in underserved areas.", | |
| "priority": "High", | |
| "data_source": "Geographic distribution analysis" | |
| }) | |
| # Resource allocation recommendations | |
| if "resource_allocation" in analysis_results: | |
| resources = analysis_results["resource_allocation"] | |
| if "staff_per_bed_ratio" in resources and resources["staff_per_bed_ratio"] < 1.5: | |
| recommendations.append({ | |
| "title": "Increase Staffing Levels", | |
| "description": f"Staff per bed ratio is {resources['staff_per_bed_ratio']:.2f}, which may be insufficient. Consider hiring additional staff.", | |
| "priority": "High", | |
| "data_source": "Resource allocation analysis" | |
| }) | |
| # Sort by priority | |
| priority_order = {"High": 0, "Medium": 1, "Low": 2} | |
| recommendations.sort(key=lambda x: priority_order.get(x["priority"], 3)) | |
| return recommendations | |
| def identify_integration_opportunities(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]: | |
| """Identify opportunities for AI integration and data enhancement""" | |
| opportunities = { | |
| "data_integration": [], | |
| "ai_applications": [], | |
| "enhanced_metrics": [] | |
| } | |
| # Data integration opportunities | |
| opportunities["data_integration"].append({ | |
| "opportunity": "Integrate real-time occupancy data", | |
| "description": "Combine current facility data with real-time occupancy monitoring systems", | |
| "benefit": "Enable dynamic resource allocation and surge planning" | |
| }) | |
| opportunities["data_integration"].append({ | |
| "opportunity": "Incorporate demographic data", | |
| "description": "Add population demographics and health needs data", | |
| "benefit": "Improve demand forecasting and service planning" | |
| }) | |
| # AI application opportunities | |
| opportunities["ai_applications"].append({ | |
| "opportunity": "Predictive capacity modeling", | |
| "description": "Use ML to forecast capacity needs based on trends and external factors", | |
| "benefit": "Proactive resource planning and reduced wait times" | |
| }) | |
| opportunities["ai_applications"].append({ | |
| "opportunity": "Optimization algorithms", | |
| "description": "Implement AI for staff scheduling and resource allocation", | |
| "benefit": "Improved efficiency and reduced operational costs" | |
| }) | |
| # Enhanced metrics | |
| opportunities["enhanced_metrics"].append({ | |
| "metric": "Patient flow efficiency", | |
| "description": "Measure time from admission to discharge across facilities", | |
| "benefit": "Identify bottlenecks and improve patient experience" | |
| }) | |
| opportunities["enhanced_metrics"].append({ | |
| "metric": "Resource utilization index", | |
| "description": "Composite metric combining staff, equipment, and space utilization", | |
| "benefit": "Holistic view of operational efficiency" | |
| }) | |
| return opportunities | |
| # Helper methods | |
| def _find_column(self, df, patterns): | |
| """Find the first column matching any pattern""" | |
| for col in df.columns: | |
| if any(pattern.lower() in col.lower() for pattern in patterns): | |
| return col | |
| return None | |
| def _calculate_gini(self, values): | |
| """Calculate Gini coefficient for inequality measurement""" | |
| values = sorted(values) | |
| n = len(values) | |
| index = np.arange(1, n + 1) | |
| gini = (np.sum((2 * index - n - 1) * values)) / (n * np.sum(values)) | |
| return gini | |
| def _calculate_diversity_index(self, distribution): | |
| """Calculate Shannon diversity index""" | |
| total = sum(distribution.values()) | |
| if total == 0: | |
| return 0 | |
| proportions = [count/total for count in distribution.values()] | |
| return -sum(p * np.log(p) for p in proportions if p > 0) | |
| def _extract_geographic_scope(self, text): | |
| """Extract geographic scope from text""" | |
| # Simple keyword-based extraction | |
| if "alberta" in text.lower(): | |
| return "Alberta" | |
| elif "canada" in text.lower(): | |
| return "Canada" | |
| return "Unknown" | |
| def _extract_time_period(self, text): | |
| """Extract time period from text""" | |
| # Look for year patterns | |
| import re | |
| years = re.findall(r'\b(20\d{2})\b', text) | |
| if len(years) >= 2: | |
| return f"{min(years)}-{max(years)}" | |
| return "Unknown" | |
| def _extract_facility_types(self, text): | |
| """Extract facility types from text""" | |
| types = [] | |
| if "hospital" in text.lower(): | |
| types.append("Hospitals") | |
| if "nursing" in text.lower() or "long-term" in text.lower(): | |
| types.append("Nursing homes") | |
| if "clinic" in text.lower(): | |
| types.append("Clinics") | |
| return types | |
| def _extract_metrics(self, text): | |
| """Extract required metrics from text""" | |
| metrics = [] | |
| if "bed" in text.lower(): | |
| metrics.append("Bed capacity") | |
| if "occupancy" in text.lower(): | |
| metrics.append("Occupancy rates") | |
| if "staff" in text.lower(): | |
| metrics.append("Staffing levels") | |
| return metrics | |
| def _identify_relevant_data(self, text): | |
| """Identify relevant datasets for the scenario""" | |
| # Use data registry's find_related_datasets method | |
| keywords = ["facility", "bed", "capacity", "healthcare", "hospital"] | |
| return [item["name"] for item in self.data_registry.find_related_datasets(keywords)] |