Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update healthcare_analysis.py
Browse files- healthcare_analysis.py +211 -67
healthcare_analysis.py
CHANGED
|
@@ -11,11 +11,15 @@ class HealthcareAnalyzer:
|
|
| 11 |
def __init__(self, data_registry):
|
| 12 |
self.data_registry = data_registry
|
| 13 |
self.analysis_results = {}
|
|
|
|
| 14 |
|
| 15 |
def comprehensive_analysis(self, scenario_text: str) -> Dict[str, Any]:
|
| 16 |
"""Perform comprehensive healthcare scenario analysis"""
|
| 17 |
logger.info("Starting comprehensive healthcare analysis")
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
# Extract tasks and requirements
|
| 20 |
tasks = self._extract_tasks(scenario_text)
|
| 21 |
requirements = self._extract_requirements(scenario_text)
|
|
@@ -27,10 +31,10 @@ class HealthcareAnalyzer:
|
|
| 27 |
results = {}
|
| 28 |
|
| 29 |
if "facility_distribution" in tasks:
|
| 30 |
-
results["facility_distribution"] = self.analyze_facility_distribution(relevant_data)
|
| 31 |
|
| 32 |
if "capacity_analysis" in tasks:
|
| 33 |
-
results["capacity_analysis"] = self.analyze_capacity(relevant_data)
|
| 34 |
|
| 35 |
if "resource_allocation" in tasks:
|
| 36 |
results["resource_allocation"] = self.analyze_resource_allocation(relevant_data)
|
|
@@ -69,66 +73,90 @@ class HealthcareAnalyzer:
|
|
| 69 |
"geographic_scope": self._extract_geographic_scope(scenario_text),
|
| 70 |
"time_period": self._extract_time_period(scenario_text),
|
| 71 |
"facility_types": self._extract_facility_types(scenario_text),
|
| 72 |
-
"metrics_needed": self._extract_metrics(scenario_text)
|
|
|
|
| 73 |
}
|
| 74 |
|
| 75 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
"""Enhanced facility distribution analysis"""
|
| 77 |
results = {}
|
|
|
|
|
|
|
| 78 |
|
| 79 |
for data_name in relevant_data:
|
| 80 |
df = self.data_registry.get(data_name)
|
| 81 |
if df is None or df.empty:
|
| 82 |
continue
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
if geo_col:
|
| 87 |
-
# Ensure we're working with string data
|
| 88 |
-
df[geo_col] = df[geo_col].astype(str)
|
| 89 |
-
alberta_mask = df[geo_col].str.lower().isin(['alberta', 'ab'])
|
| 90 |
-
ab_facilities = df[alberta_mask].copy()
|
| 91 |
-
|
| 92 |
-
if not ab_facilities.empty:
|
| 93 |
-
geo_dist = ab_facilities[geo_col].value_counts().to_dict()
|
| 94 |
-
results["geographic_distribution"] = geo_dist
|
| 95 |
-
|
| 96 |
-
# Calculate Gini coefficient for inequality
|
| 97 |
-
gini = self._calculate_gini(list(geo_dist.values()))
|
| 98 |
-
results["geographic_inequality"] = gini
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 100 |
# Facility type distribution
|
| 101 |
-
type_col = self._find_column(
|
| 102 |
if type_col:
|
| 103 |
# Ensure we're working with string data
|
| 104 |
-
|
| 105 |
-
type_dist =
|
| 106 |
results["facility_type_distribution"] = type_dist
|
| 107 |
|
| 108 |
# Calculate diversity index
|
| 109 |
diversity = self._calculate_diversity_index(type_dist)
|
| 110 |
results["facility_diversity"] = diversity
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
|
| 114 |
-
if
|
| 115 |
# Ensure we're working with string data
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
results["
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
# City distribution
|
| 121 |
-
city_col = self._find_column(
|
| 122 |
if city_col:
|
| 123 |
# Ensure we're working with string data
|
| 124 |
-
|
| 125 |
-
city_counts =
|
| 126 |
top_cities = city_counts.index.tolist()
|
| 127 |
|
| 128 |
# Breakdown by facility type for top cities
|
| 129 |
city_breakdown = {}
|
| 130 |
for city in top_cities:
|
| 131 |
-
city_data =
|
| 132 |
if not city_data.empty and type_col in city_data.columns:
|
| 133 |
city_breakdown[city] = city_data[type_col].value_counts().to_dict()
|
| 134 |
|
|
@@ -136,54 +164,62 @@ class HealthcareAnalyzer:
|
|
| 136 |
results["city_breakdown"] = city_breakdown
|
| 137 |
|
| 138 |
# Total facilities count
|
| 139 |
-
results["total_facilities"] = len(
|
| 140 |
|
| 141 |
return results
|
| 142 |
|
| 143 |
-
def analyze_capacity(self, relevant_data: List[str]) -> Dict[str, Any]:
|
| 144 |
"""Enhanced capacity analysis"""
|
| 145 |
results = {}
|
|
|
|
|
|
|
| 146 |
|
| 147 |
for data_name in relevant_data:
|
| 148 |
df = self.data_registry.get(data_name)
|
| 149 |
if df is None or df.empty:
|
| 150 |
continue
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# Current capacity
|
| 153 |
-
capacity_col = self._find_column(
|
| 154 |
if capacity_col:
|
| 155 |
# Ensure we're working with numeric data
|
| 156 |
-
|
| 157 |
-
total_capacity =
|
| 158 |
results["total_capacity"] = total_capacity
|
| 159 |
|
| 160 |
# Capacity by facility type
|
| 161 |
-
type_col = self._find_column(
|
| 162 |
-
if type_col and type_col in
|
| 163 |
-
capacity_by_type =
|
| 164 |
results["capacity_by_type"] = capacity_by_type
|
| 165 |
|
| 166 |
# Capacity utilization
|
| 167 |
-
utilization_col = self._find_column(
|
| 168 |
if utilization_col:
|
| 169 |
# Ensure we're working with numeric data
|
| 170 |
-
|
| 171 |
-
avg_utilization =
|
| 172 |
results["average_utilization"] = avg_utilization
|
| 173 |
|
| 174 |
# Utilization by facility type
|
| 175 |
-
if type_col and type_col in
|
| 176 |
-
utilization_by_type =
|
| 177 |
results["utilization_by_type"] = utilization_by_type
|
| 178 |
|
| 179 |
# Capacity trends
|
| 180 |
-
time_cols = [col for col in
|
| 181 |
if len(time_cols) >= 2:
|
| 182 |
trend_data = {}
|
| 183 |
for col in time_cols:
|
| 184 |
# Ensure we're working with numeric data
|
| 185 |
-
|
| 186 |
-
trend_data[col] =
|
| 187 |
results["capacity_trends"] = trend_data
|
| 188 |
|
| 189 |
# Calculate growth rate
|
|
@@ -195,30 +231,30 @@ class HealthcareAnalyzer:
|
|
| 195 |
results["capacity_growth_rate"] = growth_rate
|
| 196 |
|
| 197 |
# Bed change analysis
|
| 198 |
-
prev_col = self._find_column(
|
| 199 |
-
current_col = self._find_column(
|
| 200 |
|
| 201 |
if prev_col and current_col:
|
| 202 |
# Ensure we're working with numeric data
|
| 203 |
-
|
| 204 |
-
|
| 205 |
|
| 206 |
# Calculate bed change
|
| 207 |
-
|
| 208 |
|
| 209 |
# Calculate percentage change
|
| 210 |
-
|
| 211 |
lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
|
| 212 |
axis=1
|
| 213 |
)
|
| 214 |
|
| 215 |
-
# Zone-level analysis
|
| 216 |
-
zone_col = self._find_column(
|
| 217 |
if zone_col:
|
| 218 |
# Ensure we're working with string data
|
| 219 |
-
|
| 220 |
|
| 221 |
-
zone_summary =
|
| 222 |
current_col: 'sum',
|
| 223 |
prev_col: 'sum',
|
| 224 |
'bed_change': 'sum'
|
|
@@ -246,12 +282,72 @@ class HealthcareAnalyzer:
|
|
| 246 |
results["max_percentage_decrease"] = max_pct_decrease.to_dict()
|
| 247 |
|
| 248 |
# Identify facilities with largest declines
|
| 249 |
-
facilities_decline =
|
| 250 |
if not facilities_decline.empty:
|
| 251 |
results["facilities_with_largest_declines"] = facilities_decline.to_dict('records')
|
| 252 |
|
| 253 |
return results
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
def analyze_resource_allocation(self, relevant_data: List[str]) -> Dict[str, Any]:
|
| 256 |
"""Analyze resource allocation patterns"""
|
| 257 |
results = {}
|
|
@@ -331,6 +427,7 @@ class HealthcareAnalyzer:
|
|
| 331 |
def generate_recommendations(self, analysis_results: Dict[str, Any], requirements: Dict[str, Any]) -> List[Dict[str, str]]:
|
| 332 |
"""Generate data-driven operational recommendations"""
|
| 333 |
recommendations = []
|
|
|
|
| 334 |
|
| 335 |
# Capacity-related recommendations
|
| 336 |
if "capacity_analysis" in analysis_results:
|
|
@@ -340,7 +437,7 @@ class HealthcareAnalyzer:
|
|
| 340 |
if "average_utilization" in capacity and capacity["average_utilization"] < 0.7:
|
| 341 |
recommendations.append({
|
| 342 |
"title": "Optimize Underutilized Capacity",
|
| 343 |
-
"description": f"Average utilization is {capacity['average_utilization']:.1%}. Consider repurposing underutilized facilities or consolidating services.",
|
| 344 |
"priority": "Medium",
|
| 345 |
"data_source": "Capacity utilization analysis"
|
| 346 |
})
|
|
@@ -349,10 +446,24 @@ class HealthcareAnalyzer:
|
|
| 349 |
if "capacity_growth_rate" in capacity and capacity["capacity_growth_rate"] < 2:
|
| 350 |
recommendations.append({
|
| 351 |
"title": "Expand Capacity Strategically",
|
| 352 |
-
"description": f"Capacity growth rate is only {capacity['capacity_growth_rate']:.1f}
|
| 353 |
"priority": "High",
|
| 354 |
"data_source": "Capacity trend analysis"
|
| 355 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
# Geographic distribution recommendations
|
| 358 |
if "facility_distribution" in analysis_results:
|
|
@@ -361,7 +472,7 @@ class HealthcareAnalyzer:
|
|
| 361 |
if "geographic_inequality" in dist and dist["geographic_inequality"] > 0.4:
|
| 362 |
recommendations.append({
|
| 363 |
"title": "Address Geographic Inequity",
|
| 364 |
-
"description": f"High geographic inequality (Gini: {dist['geographic_inequality']:.2f}). Consider targeted investments in underserved areas.",
|
| 365 |
"priority": "High",
|
| 366 |
"data_source": "Geographic distribution analysis"
|
| 367 |
})
|
|
@@ -373,7 +484,7 @@ class HealthcareAnalyzer:
|
|
| 373 |
if "staff_per_bed_ratio" in resources and resources["staff_per_bed_ratio"] < 1.5:
|
| 374 |
recommendations.append({
|
| 375 |
"title": "Increase Staffing Levels",
|
| 376 |
-
"description": f"Staff per bed ratio is {resources['staff_per_bed_ratio']:.2f}, which may be insufficient. Consider hiring additional staff.",
|
| 377 |
"priority": "High",
|
| 378 |
"data_source": "Resource allocation analysis"
|
| 379 |
})
|
|
@@ -476,11 +587,44 @@ class HealthcareAnalyzer:
|
|
| 476 |
|
| 477 |
def _extract_geographic_scope(self, text):
|
| 478 |
"""Extract geographic scope from text"""
|
| 479 |
-
#
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
return "Canada"
|
|
|
|
|
|
|
|
|
|
| 484 |
return "Unknown"
|
| 485 |
|
| 486 |
def _extract_time_period(self, text):
|
|
|
|
| 11 |
def __init__(self, data_registry):
|
| 12 |
self.data_registry = data_registry
|
| 13 |
self.analysis_results = {}
|
| 14 |
+
self.scenario_text = "" # Store scenario text for context
|
| 15 |
|
| 16 |
def comprehensive_analysis(self, scenario_text: str) -> Dict[str, Any]:
|
| 17 |
"""Perform comprehensive healthcare scenario analysis"""
|
| 18 |
logger.info("Starting comprehensive healthcare analysis")
|
| 19 |
|
| 20 |
+
# Store scenario text for use in other methods
|
| 21 |
+
self.scenario_text = scenario_text
|
| 22 |
+
|
| 23 |
# Extract tasks and requirements
|
| 24 |
tasks = self._extract_tasks(scenario_text)
|
| 25 |
requirements = self._extract_requirements(scenario_text)
|
|
|
|
| 31 |
results = {}
|
| 32 |
|
| 33 |
if "facility_distribution" in tasks:
|
| 34 |
+
results["facility_distribution"] = self.analyze_facility_distribution(relevant_data, requirements)
|
| 35 |
|
| 36 |
if "capacity_analysis" in tasks:
|
| 37 |
+
results["capacity_analysis"] = self.analyze_capacity(relevant_data, requirements)
|
| 38 |
|
| 39 |
if "resource_allocation" in tasks:
|
| 40 |
results["resource_allocation"] = self.analyze_resource_allocation(relevant_data)
|
|
|
|
| 73 |
"geographic_scope": self._extract_geographic_scope(scenario_text),
|
| 74 |
"time_period": self._extract_time_period(scenario_text),
|
| 75 |
"facility_types": self._extract_facility_types(scenario_text),
|
| 76 |
+
"metrics_needed": self._extract_metrics(scenario_text),
|
| 77 |
+
"regions": self._extract_regions(scenario_text)
|
| 78 |
}
|
| 79 |
|
| 80 |
+
def _extract_regions(self, scenario_text: str) -> List[str]:
|
| 81 |
+
"""Extract specific regions mentioned in the scenario"""
|
| 82 |
+
# Look for region names in the scenario
|
| 83 |
+
regions = []
|
| 84 |
+
|
| 85 |
+
# Common region patterns - this could be expanded
|
| 86 |
+
region_patterns = [
|
| 87 |
+
r'([A-Z][a-z]+ (Zone|Region|Area|District))',
|
| 88 |
+
r'(North|South|East|West|Central|Calgary|Edmonton|Toronto|Vancouver|Montreal)',
|
| 89 |
+
r'(Alberta|British Columbia|Ontario|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|PEI|Newfoundland|Yukon|NWT|Nunavut)'
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
import re
|
| 93 |
+
for pattern in region_patterns:
|
| 94 |
+
matches = re.findall(pattern, scenario_text)
|
| 95 |
+
for match in matches:
|
| 96 |
+
if isinstance(match, tuple):
|
| 97 |
+
regions.append(match[0])
|
| 98 |
+
else:
|
| 99 |
+
regions.append(match)
|
| 100 |
+
|
| 101 |
+
# Remove duplicates while preserving order
|
| 102 |
+
seen = set()
|
| 103 |
+
unique_regions = [r for r in regions if not (r in seen or seen.add(r))]
|
| 104 |
+
|
| 105 |
+
return unique_regions
|
| 106 |
+
|
| 107 |
+
def analyze_facility_distribution(self, relevant_data: List[str], requirements: Dict[str, Any]) -> Dict[str, Any]:
|
| 108 |
"""Enhanced facility distribution analysis"""
|
| 109 |
results = {}
|
| 110 |
+
geographic_scope = requirements.get("geographic_scope", "Unknown")
|
| 111 |
+
regions = requirements.get("regions", [])
|
| 112 |
|
| 113 |
for data_name in relevant_data:
|
| 114 |
df = self.data_registry.get(data_name)
|
| 115 |
if df is None or df.empty:
|
| 116 |
continue
|
| 117 |
|
| 118 |
+
# Filter data based on geographic scope
|
| 119 |
+
filtered_df = self._filter_by_geography(df, geographic_scope, regions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
if filtered_df.empty:
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
# Facility type distribution
|
| 125 |
+
type_col = self._find_column(filtered_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
|
| 126 |
if type_col:
|
| 127 |
# Ensure we're working with string data
|
| 128 |
+
filtered_df[type_col] = filtered_df[type_col].astype(str)
|
| 129 |
+
type_dist = filtered_df[type_col].value_counts().to_dict()
|
| 130 |
results["facility_type_distribution"] = type_dist
|
| 131 |
|
| 132 |
# Calculate diversity index
|
| 133 |
diversity = self._calculate_diversity_index(type_dist)
|
| 134 |
results["facility_diversity"] = diversity
|
| 135 |
|
| 136 |
+
# Geographic distribution
|
| 137 |
+
geo_col = self._find_column(filtered_df, ['province', 'state', 'region', 'zone', 'area'])
|
| 138 |
+
if geo_col:
|
| 139 |
# Ensure we're working with string data
|
| 140 |
+
filtered_df[geo_col] = filtered_df[geo_col].astype(str)
|
| 141 |
+
geo_dist = filtered_df[geo_col].value_counts().to_dict()
|
| 142 |
+
results["geographic_distribution"] = geo_dist
|
| 143 |
+
|
| 144 |
+
# Calculate Gini coefficient for inequality
|
| 145 |
+
gini = self._calculate_gini(list(geo_dist.values()))
|
| 146 |
+
results["geographic_inequality"] = gini
|
| 147 |
|
| 148 |
# City distribution
|
| 149 |
+
city_col = self._find_column(filtered_df, ['city', 'municipality', 'town'])
|
| 150 |
if city_col:
|
| 151 |
# Ensure we're working with string data
|
| 152 |
+
filtered_df[city_col] = filtered_df[city_col].astype(str)
|
| 153 |
+
city_counts = filtered_df[city_col].value_counts().head(5)
|
| 154 |
top_cities = city_counts.index.tolist()
|
| 155 |
|
| 156 |
# Breakdown by facility type for top cities
|
| 157 |
city_breakdown = {}
|
| 158 |
for city in top_cities:
|
| 159 |
+
city_data = filtered_df[filtered_df[city_col] == city]
|
| 160 |
if not city_data.empty and type_col in city_data.columns:
|
| 161 |
city_breakdown[city] = city_data[type_col].value_counts().to_dict()
|
| 162 |
|
|
|
|
| 164 |
results["city_breakdown"] = city_breakdown
|
| 165 |
|
| 166 |
# Total facilities count
|
| 167 |
+
results["total_facilities"] = len(filtered_df)
|
| 168 |
|
| 169 |
return results
|
| 170 |
|
| 171 |
+
def analyze_capacity(self, relevant_data: List[str], requirements: Dict[str, Any]) -> Dict[str, Any]:
|
| 172 |
"""Enhanced capacity analysis"""
|
| 173 |
results = {}
|
| 174 |
+
geographic_scope = requirements.get("geographic_scope", "Unknown")
|
| 175 |
+
regions = requirements.get("regions", [])
|
| 176 |
|
| 177 |
for data_name in relevant_data:
|
| 178 |
df = self.data_registry.get(data_name)
|
| 179 |
if df is None or df.empty:
|
| 180 |
continue
|
| 181 |
|
| 182 |
+
# Filter data based on geographic scope
|
| 183 |
+
filtered_df = self._filter_by_geography(df, geographic_scope, regions)
|
| 184 |
+
|
| 185 |
+
if filtered_df.empty:
|
| 186 |
+
continue
|
| 187 |
+
|
| 188 |
# Current capacity
|
| 189 |
+
capacity_col = self._find_column(filtered_df, ['capacity', 'beds', 'current_capacity', 'beds_current'])
|
| 190 |
if capacity_col:
|
| 191 |
# Ensure we're working with numeric data
|
| 192 |
+
filtered_df[capacity_col] = pd.to_numeric(filtered_df[capacity_col], errors='coerce')
|
| 193 |
+
total_capacity = filtered_df[capacity_col].sum()
|
| 194 |
results["total_capacity"] = total_capacity
|
| 195 |
|
| 196 |
# Capacity by facility type
|
| 197 |
+
type_col = self._find_column(filtered_df, ['type', 'facility_type'])
|
| 198 |
+
if type_col and type_col in filtered_df.columns:
|
| 199 |
+
capacity_by_type = filtered_df.groupby(type_col)[capacity_col].sum().to_dict()
|
| 200 |
results["capacity_by_type"] = capacity_by_type
|
| 201 |
|
| 202 |
# Capacity utilization
|
| 203 |
+
utilization_col = self._find_column(filtered_df, ['utilization', 'occupancy', 'occupancy_rate'])
|
| 204 |
if utilization_col:
|
| 205 |
# Ensure we're working with numeric data
|
| 206 |
+
filtered_df[utilization_col] = pd.to_numeric(filtered_df[utilization_col], errors='coerce')
|
| 207 |
+
avg_utilization = filtered_df[utilization_col].mean()
|
| 208 |
results["average_utilization"] = avg_utilization
|
| 209 |
|
| 210 |
# Utilization by facility type
|
| 211 |
+
if type_col and type_col in filtered_df.columns:
|
| 212 |
+
utilization_by_type = filtered_df.groupby(type_col)[utilization_col].mean().to_dict()
|
| 213 |
results["utilization_by_type"] = utilization_by_type
|
| 214 |
|
| 215 |
# Capacity trends
|
| 216 |
+
time_cols = [col for col in filtered_df.columns if any(year in col.lower() for year in ['2020', '2021', '2022', '2023', '2024'])]
|
| 217 |
if len(time_cols) >= 2:
|
| 218 |
trend_data = {}
|
| 219 |
for col in time_cols:
|
| 220 |
# Ensure we're working with numeric data
|
| 221 |
+
filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce')
|
| 222 |
+
trend_data[col] = filtered_df[col].sum()
|
| 223 |
results["capacity_trends"] = trend_data
|
| 224 |
|
| 225 |
# Calculate growth rate
|
|
|
|
| 231 |
results["capacity_growth_rate"] = growth_rate
|
| 232 |
|
| 233 |
# Bed change analysis
|
| 234 |
+
prev_col = self._find_column(filtered_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
|
| 235 |
+
current_col = self._find_column(filtered_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
|
| 236 |
|
| 237 |
if prev_col and current_col:
|
| 238 |
# Ensure we're working with numeric data
|
| 239 |
+
filtered_df[prev_col] = pd.to_numeric(filtered_df[prev_col], errors='coerce')
|
| 240 |
+
filtered_df[current_col] = pd.to_numeric(filtered_df[current_col], errors='coerce')
|
| 241 |
|
| 242 |
# Calculate bed change
|
| 243 |
+
filtered_df['bed_change'] = filtered_df[current_col] - filtered_df[prev_col]
|
| 244 |
|
| 245 |
# Calculate percentage change
|
| 246 |
+
filtered_df['percent_change'] = filtered_df.apply(
|
| 247 |
lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
|
| 248 |
axis=1
|
| 249 |
)
|
| 250 |
|
| 251 |
+
# Zone/Region-level analysis
|
| 252 |
+
zone_col = self._find_column(filtered_df, ['zone', 'region', 'area', 'district'])
|
| 253 |
if zone_col:
|
| 254 |
# Ensure we're working with string data
|
| 255 |
+
filtered_df[zone_col] = filtered_df[zone_col].astype(str)
|
| 256 |
|
| 257 |
+
zone_summary = filtered_df.groupby(zone_col).agg({
|
| 258 |
current_col: 'sum',
|
| 259 |
prev_col: 'sum',
|
| 260 |
'bed_change': 'sum'
|
|
|
|
| 282 |
results["max_percentage_decrease"] = max_pct_decrease.to_dict()
|
| 283 |
|
| 284 |
# Identify facilities with largest declines
|
| 285 |
+
facilities_decline = filtered_df.sort_values('bed_change').head(5)
|
| 286 |
if not facilities_decline.empty:
|
| 287 |
results["facilities_with_largest_declines"] = facilities_decline.to_dict('records')
|
| 288 |
|
| 289 |
return results
|
| 290 |
|
| 291 |
+
def _filter_by_geography(self, df: pd.DataFrame, geographic_scope: str, regions: List[str]) -> pd.DataFrame:
|
| 292 |
+
"""Filter dataframe based on geographic scope and regions"""
|
| 293 |
+
if geographic_scope == "Unknown" and not regions:
|
| 294 |
+
return df.copy()
|
| 295 |
+
|
| 296 |
+
# Try to find a geographic column
|
| 297 |
+
geo_col = self._find_column(df, ['province', 'state', 'region', 'zone', 'area', 'district'])
|
| 298 |
+
|
| 299 |
+
if geo_col is None:
|
| 300 |
+
return df.copy()
|
| 301 |
+
|
| 302 |
+
# Ensure we're working with string data
|
| 303 |
+
df[geo_col] = df[geo_col].astype(str)
|
| 304 |
+
|
| 305 |
+
# Create filters
|
| 306 |
+
filters = []
|
| 307 |
+
|
| 308 |
+
# Add geographic scope filter
|
| 309 |
+
if geographic_scope != "Unknown":
|
| 310 |
+
# Create a list of possible values for the geographic scope
|
| 311 |
+
scope_values = [geographic_scope.lower()]
|
| 312 |
+
|
| 313 |
+
# Add common abbreviations
|
| 314 |
+
abbreviations = {
|
| 315 |
+
"alberta": "ab",
|
| 316 |
+
"british columbia": "bc",
|
| 317 |
+
"ontario": "on",
|
| 318 |
+
"quebec": "qc",
|
| 319 |
+
"manitoba": "mb",
|
| 320 |
+
"saskatchewan": "sk",
|
| 321 |
+
"nova scotia": "ns",
|
| 322 |
+
"new brunswick": "nb",
|
| 323 |
+
"prince edward island": "pe",
|
| 324 |
+
"newfoundland": "nl",
|
| 325 |
+
"yukon": "yt",
|
| 326 |
+
"northwest territories": "nt",
|
| 327 |
+
"nunavut": "nu"
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
if geographic_scope.lower() in abbreviations:
|
| 331 |
+
scope_values.append(abbreviations[geographic_scope.lower()])
|
| 332 |
+
|
| 333 |
+
scope_filter = df[geo_col].str.lower().isin(scope_values)
|
| 334 |
+
filters.append(scope_filter)
|
| 335 |
+
|
| 336 |
+
# Add region filters
|
| 337 |
+
if regions:
|
| 338 |
+
region_filter = df[geo_col].str.lower().isin([r.lower() for r in regions])
|
| 339 |
+
filters.append(region_filter)
|
| 340 |
+
|
| 341 |
+
# Apply filters
|
| 342 |
+
if filters:
|
| 343 |
+
combined_filter = filters[0]
|
| 344 |
+
for f in filters[1:]:
|
| 345 |
+
combined_filter = combined_filter | f
|
| 346 |
+
|
| 347 |
+
return df[combined_filter].copy()
|
| 348 |
+
|
| 349 |
+
return df.copy()
|
| 350 |
+
|
| 351 |
def analyze_resource_allocation(self, relevant_data: List[str]) -> Dict[str, Any]:
|
| 352 |
"""Analyze resource allocation patterns"""
|
| 353 |
results = {}
|
|
|
|
| 427 |
def generate_recommendations(self, analysis_results: Dict[str, Any], requirements: Dict[str, Any]) -> List[Dict[str, str]]:
|
| 428 |
"""Generate data-driven operational recommendations"""
|
| 429 |
recommendations = []
|
| 430 |
+
geographic_scope = requirements.get("geographic_scope", "the region")
|
| 431 |
|
| 432 |
# Capacity-related recommendations
|
| 433 |
if "capacity_analysis" in analysis_results:
|
|
|
|
| 437 |
if "average_utilization" in capacity and capacity["average_utilization"] < 0.7:
|
| 438 |
recommendations.append({
|
| 439 |
"title": "Optimize Underutilized Capacity",
|
| 440 |
+
"description": f"Average utilization is {capacity['average_utilization']:.1%} in {geographic_scope}. Consider repurposing underutilized facilities or consolidating services.",
|
| 441 |
"priority": "Medium",
|
| 442 |
"data_source": "Capacity utilization analysis"
|
| 443 |
})
|
|
|
|
| 446 |
if "capacity_growth_rate" in capacity and capacity["capacity_growth_rate"] < 2:
|
| 447 |
recommendations.append({
|
| 448 |
"title": "Expand Capacity Strategically",
|
| 449 |
+
"description": f"Capacity growth rate is only {capacity['capacity_growth_rate']:.1f}% in {geographic_scope}. Invest in new facilities or expand existing ones to meet demand.",
|
| 450 |
"priority": "High",
|
| 451 |
"data_source": "Capacity trend analysis"
|
| 452 |
})
|
| 453 |
+
|
| 454 |
+
# Zone-specific recommendations
|
| 455 |
+
if "max_percentage_decrease" in capacity and isinstance(capacity["max_percentage_decrease"], dict):
|
| 456 |
+
zone_col = capacity.get("columns_used", {}).get("zone")
|
| 457 |
+
zone = capacity["max_percentage_decrease"].get(zone_col, 'a zone') if zone_col else 'a zone'
|
| 458 |
+
decrease = capacity["max_percentage_decrease"].get("percent_change", 0)
|
| 459 |
+
|
| 460 |
+
if zone and decrease:
|
| 461 |
+
recommendations.append({
|
| 462 |
+
"title": f"Address Capacity Decline in {zone}",
|
| 463 |
+
"description": f"{zone} shows a {decrease:.1f}% decrease in bed capacity. Investigate causes and implement recovery strategies.",
|
| 464 |
+
"priority": "High",
|
| 465 |
+
"data_source": "Zone capacity analysis"
|
| 466 |
+
})
|
| 467 |
|
| 468 |
# Geographic distribution recommendations
|
| 469 |
if "facility_distribution" in analysis_results:
|
|
|
|
| 472 |
if "geographic_inequality" in dist and dist["geographic_inequality"] > 0.4:
|
| 473 |
recommendations.append({
|
| 474 |
"title": "Address Geographic Inequity",
|
| 475 |
+
"description": f"High geographic inequality (Gini: {dist['geographic_inequality']:.2f}) in {geographic_scope}. Consider targeted investments in underserved areas.",
|
| 476 |
"priority": "High",
|
| 477 |
"data_source": "Geographic distribution analysis"
|
| 478 |
})
|
|
|
|
| 484 |
if "staff_per_bed_ratio" in resources and resources["staff_per_bed_ratio"] < 1.5:
|
| 485 |
recommendations.append({
|
| 486 |
"title": "Increase Staffing Levels",
|
| 487 |
+
"description": f"Staff per bed ratio is {resources['staff_per_bed_ratio']:.2f} in {geographic_scope}, which may be insufficient. Consider hiring additional staff.",
|
| 488 |
"priority": "High",
|
| 489 |
"data_source": "Resource allocation analysis"
|
| 490 |
})
|
|
|
|
| 587 |
|
| 588 |
def _extract_geographic_scope(self, text):
|
| 589 |
"""Extract geographic scope from text"""
|
| 590 |
+
# Look for province/state names
|
| 591 |
+
provinces = [
|
| 592 |
+
"alberta", "british columbia", "ontario", "quebec", "manitoba",
|
| 593 |
+
"saskatchewan", "nova scotia", "new brunswick", "prince edward island",
|
| 594 |
+
"newfoundland", "yukon", "northwest territories", "nunavut"
|
| 595 |
+
]
|
| 596 |
+
|
| 597 |
+
states = [
|
| 598 |
+
"alabama", "alaska", "arizona", "arkansas", "california", "colorado",
|
| 599 |
+
"connecticut", "delaware", "florida", "georgia", "hawaii", "idaho",
|
| 600 |
+
"illinois", "indiana", "iowa", "kansas", "kentucky", "louisiana",
|
| 601 |
+
"maine", "maryland", "massachusetts", "michigan", "minnesota",
|
| 602 |
+
"mississippi", "missouri", "montana", "nebraska", "nevada",
|
| 603 |
+
"new hampshire", "new jersey", "new mexico", "new york",
|
| 604 |
+
"north carolina", "north dakota", "ohio", "oklahoma", "oregon",
|
| 605 |
+
"pennsylvania", "rhode island", "south carolina", "south dakota",
|
| 606 |
+
"tennessee", "texas", "utah", "vermont", "virginia", "washington",
|
| 607 |
+
"west virginia", "wisconsin", "wyoming"
|
| 608 |
+
]
|
| 609 |
+
|
| 610 |
+
text_lower = text.lower()
|
| 611 |
+
|
| 612 |
+
# Check for provinces
|
| 613 |
+
for province in provinces:
|
| 614 |
+
if province in text_lower:
|
| 615 |
+
return province.title()
|
| 616 |
+
|
| 617 |
+
# Check for states
|
| 618 |
+
for state in states:
|
| 619 |
+
if state in text_lower:
|
| 620 |
+
return state.title()
|
| 621 |
+
|
| 622 |
+
# Check for countries
|
| 623 |
+
if "canada" in text_lower:
|
| 624 |
return "Canada"
|
| 625 |
+
if "usa" in text_lower or "united states" in text_lower:
|
| 626 |
+
return "United States"
|
| 627 |
+
|
| 628 |
return "Unknown"
|
| 629 |
|
| 630 |
def _extract_time_period(self, text):
|