Rajan Sharma commited on
Commit
c01c57e
·
verified ·
1 Parent(s): 9b7d75c

Update healthcare_analysis.py

Browse files
Files changed (1) hide show
  1. healthcare_analysis.py +211 -67
healthcare_analysis.py CHANGED
@@ -11,11 +11,15 @@ class HealthcareAnalyzer:
11
  def __init__(self, data_registry):
12
  self.data_registry = data_registry
13
  self.analysis_results = {}
 
14
 
15
  def comprehensive_analysis(self, scenario_text: str) -> Dict[str, Any]:
16
  """Perform comprehensive healthcare scenario analysis"""
17
  logger.info("Starting comprehensive healthcare analysis")
18
 
 
 
 
19
  # Extract tasks and requirements
20
  tasks = self._extract_tasks(scenario_text)
21
  requirements = self._extract_requirements(scenario_text)
@@ -27,10 +31,10 @@ class HealthcareAnalyzer:
27
  results = {}
28
 
29
  if "facility_distribution" in tasks:
30
- results["facility_distribution"] = self.analyze_facility_distribution(relevant_data)
31
 
32
  if "capacity_analysis" in tasks:
33
- results["capacity_analysis"] = self.analyze_capacity(relevant_data)
34
 
35
  if "resource_allocation" in tasks:
36
  results["resource_allocation"] = self.analyze_resource_allocation(relevant_data)
@@ -69,66 +73,90 @@ class HealthcareAnalyzer:
69
  "geographic_scope": self._extract_geographic_scope(scenario_text),
70
  "time_period": self._extract_time_period(scenario_text),
71
  "facility_types": self._extract_facility_types(scenario_text),
72
- "metrics_needed": self._extract_metrics(scenario_text)
 
73
  }
74
 
75
- def analyze_facility_distribution(self, relevant_data: List[str]) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  """Enhanced facility distribution analysis"""
77
  results = {}
 
 
78
 
79
  for data_name in relevant_data:
80
  df = self.data_registry.get(data_name)
81
  if df is None or df.empty:
82
  continue
83
 
84
- # Geographic distribution
85
- geo_col = self._find_column(df, ['province', 'state', 'region', 'zone'])
86
- if geo_col:
87
- # Ensure we're working with string data
88
- df[geo_col] = df[geo_col].astype(str)
89
- alberta_mask = df[geo_col].str.lower().isin(['alberta', 'ab'])
90
- ab_facilities = df[alberta_mask].copy()
91
-
92
- if not ab_facilities.empty:
93
- geo_dist = ab_facilities[geo_col].value_counts().to_dict()
94
- results["geographic_distribution"] = geo_dist
95
-
96
- # Calculate Gini coefficient for inequality
97
- gini = self._calculate_gini(list(geo_dist.values()))
98
- results["geographic_inequality"] = gini
99
 
 
 
 
100
  # Facility type distribution
101
- type_col = self._find_column(df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
102
  if type_col:
103
  # Ensure we're working with string data
104
- df[type_col] = df[type_col].astype(str)
105
- type_dist = df[type_col].value_counts().to_dict()
106
  results["facility_type_distribution"] = type_dist
107
 
108
  # Calculate diversity index
109
  diversity = self._calculate_diversity_index(type_dist)
110
  results["facility_diversity"] = diversity
111
 
112
- # Urban vs rural distribution
113
- urban_col = self._find_column(df, ['urban', 'rural', 'location_type'])
114
- if urban_col:
115
  # Ensure we're working with string data
116
- df[urban_col] = df[urban_col].astype(str)
117
- urban_rural = df[urban_col].value_counts().to_dict()
118
- results["urban_rural_distribution"] = urban_rural
 
 
 
 
119
 
120
  # City distribution
121
- city_col = self._find_column(df, ['city', 'municipality', 'town'])
122
  if city_col:
123
  # Ensure we're working with string data
124
- df[city_col] = df[city_col].astype(str)
125
- city_counts = df[city_col].value_counts().head(5)
126
  top_cities = city_counts.index.tolist()
127
 
128
  # Breakdown by facility type for top cities
129
  city_breakdown = {}
130
  for city in top_cities:
131
- city_data = df[df[city_col] == city]
132
  if not city_data.empty and type_col in city_data.columns:
133
  city_breakdown[city] = city_data[type_col].value_counts().to_dict()
134
 
@@ -136,54 +164,62 @@ class HealthcareAnalyzer:
136
  results["city_breakdown"] = city_breakdown
137
 
138
  # Total facilities count
139
- results["total_facilities"] = len(df)
140
 
141
  return results
142
 
143
- def analyze_capacity(self, relevant_data: List[str]) -> Dict[str, Any]:
144
  """Enhanced capacity analysis"""
145
  results = {}
 
 
146
 
147
  for data_name in relevant_data:
148
  df = self.data_registry.get(data_name)
149
  if df is None or df.empty:
150
  continue
151
 
 
 
 
 
 
 
152
  # Current capacity
153
- capacity_col = self._find_column(df, ['capacity', 'beds', 'current_capacity', 'beds_current'])
154
  if capacity_col:
155
  # Ensure we're working with numeric data
156
- df[capacity_col] = pd.to_numeric(df[capacity_col], errors='coerce')
157
- total_capacity = df[capacity_col].sum()
158
  results["total_capacity"] = total_capacity
159
 
160
  # Capacity by facility type
161
- type_col = self._find_column(df, ['type', 'facility_type'])
162
- if type_col and type_col in df.columns:
163
- capacity_by_type = df.groupby(type_col)[capacity_col].sum().to_dict()
164
  results["capacity_by_type"] = capacity_by_type
165
 
166
  # Capacity utilization
167
- utilization_col = self._find_column(df, ['utilization', 'occupancy', 'occupancy_rate'])
168
  if utilization_col:
169
  # Ensure we're working with numeric data
170
- df[utilization_col] = pd.to_numeric(df[utilization_col], errors='coerce')
171
- avg_utilization = df[utilization_col].mean()
172
  results["average_utilization"] = avg_utilization
173
 
174
  # Utilization by facility type
175
- if type_col and type_col in df.columns:
176
- utilization_by_type = df.groupby(type_col)[utilization_col].mean().to_dict()
177
  results["utilization_by_type"] = utilization_by_type
178
 
179
  # Capacity trends
180
- time_cols = [col for col in df.columns if any(year in col.lower() for year in ['2020', '2021', '2022', '2023', '2024'])]
181
  if len(time_cols) >= 2:
182
  trend_data = {}
183
  for col in time_cols:
184
  # Ensure we're working with numeric data
185
- df[col] = pd.to_numeric(df[col], errors='coerce')
186
- trend_data[col] = df[col].sum()
187
  results["capacity_trends"] = trend_data
188
 
189
  # Calculate growth rate
@@ -195,30 +231,30 @@ class HealthcareAnalyzer:
195
  results["capacity_growth_rate"] = growth_rate
196
 
197
  # Bed change analysis
198
- prev_col = self._find_column(df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
199
- current_col = self._find_column(df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
200
 
201
  if prev_col and current_col:
202
  # Ensure we're working with numeric data
203
- df[prev_col] = pd.to_numeric(df[prev_col], errors='coerce')
204
- df[current_col] = pd.to_numeric(df[current_col], errors='coerce')
205
 
206
  # Calculate bed change
207
- df['bed_change'] = df[current_col] - df[prev_col]
208
 
209
  # Calculate percentage change
210
- df['percent_change'] = df.apply(
211
  lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
212
  axis=1
213
  )
214
 
215
- # Zone-level analysis
216
- zone_col = self._find_column(df, ['zone', 'region', 'area', 'district'])
217
  if zone_col:
218
  # Ensure we're working with string data
219
- df[zone_col] = df[zone_col].astype(str)
220
 
221
- zone_summary = df.groupby(zone_col).agg({
222
  current_col: 'sum',
223
  prev_col: 'sum',
224
  'bed_change': 'sum'
@@ -246,12 +282,72 @@ class HealthcareAnalyzer:
246
  results["max_percentage_decrease"] = max_pct_decrease.to_dict()
247
 
248
  # Identify facilities with largest declines
249
- facilities_decline = df.sort_values('bed_change').head(5)
250
  if not facilities_decline.empty:
251
  results["facilities_with_largest_declines"] = facilities_decline.to_dict('records')
252
 
253
  return results
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def analyze_resource_allocation(self, relevant_data: List[str]) -> Dict[str, Any]:
256
  """Analyze resource allocation patterns"""
257
  results = {}
@@ -331,6 +427,7 @@ class HealthcareAnalyzer:
331
  def generate_recommendations(self, analysis_results: Dict[str, Any], requirements: Dict[str, Any]) -> List[Dict[str, str]]:
332
  """Generate data-driven operational recommendations"""
333
  recommendations = []
 
334
 
335
  # Capacity-related recommendations
336
  if "capacity_analysis" in analysis_results:
@@ -340,7 +437,7 @@ class HealthcareAnalyzer:
340
  if "average_utilization" in capacity and capacity["average_utilization"] < 0.7:
341
  recommendations.append({
342
  "title": "Optimize Underutilized Capacity",
343
- "description": f"Average utilization is {capacity['average_utilization']:.1%}. Consider repurposing underutilized facilities or consolidating services.",
344
  "priority": "Medium",
345
  "data_source": "Capacity utilization analysis"
346
  })
@@ -349,10 +446,24 @@ class HealthcareAnalyzer:
349
  if "capacity_growth_rate" in capacity and capacity["capacity_growth_rate"] < 2:
350
  recommendations.append({
351
  "title": "Expand Capacity Strategically",
352
- "description": f"Capacity growth rate is only {capacity['capacity_growth_rate']:.1f}%. Invest in new facilities or expand existing ones to meet demand.",
353
  "priority": "High",
354
  "data_source": "Capacity trend analysis"
355
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
  # Geographic distribution recommendations
358
  if "facility_distribution" in analysis_results:
@@ -361,7 +472,7 @@ class HealthcareAnalyzer:
361
  if "geographic_inequality" in dist and dist["geographic_inequality"] > 0.4:
362
  recommendations.append({
363
  "title": "Address Geographic Inequity",
364
- "description": f"High geographic inequality (Gini: {dist['geographic_inequality']:.2f}). Consider targeted investments in underserved areas.",
365
  "priority": "High",
366
  "data_source": "Geographic distribution analysis"
367
  })
@@ -373,7 +484,7 @@ class HealthcareAnalyzer:
373
  if "staff_per_bed_ratio" in resources and resources["staff_per_bed_ratio"] < 1.5:
374
  recommendations.append({
375
  "title": "Increase Staffing Levels",
376
- "description": f"Staff per bed ratio is {resources['staff_per_bed_ratio']:.2f}, which may be insufficient. Consider hiring additional staff.",
377
  "priority": "High",
378
  "data_source": "Resource allocation analysis"
379
  })
@@ -476,11 +587,44 @@ class HealthcareAnalyzer:
476
 
477
  def _extract_geographic_scope(self, text):
478
  """Extract geographic scope from text"""
479
- # Simple keyword-based extraction
480
- if "alberta" in text.lower():
481
- return "Alberta"
482
- elif "canada" in text.lower():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  return "Canada"
 
 
 
484
  return "Unknown"
485
 
486
  def _extract_time_period(self, text):
 
11
  def __init__(self, data_registry):
12
  self.data_registry = data_registry
13
  self.analysis_results = {}
14
+ self.scenario_text = "" # Store scenario text for context
15
 
16
  def comprehensive_analysis(self, scenario_text: str) -> Dict[str, Any]:
17
  """Perform comprehensive healthcare scenario analysis"""
18
  logger.info("Starting comprehensive healthcare analysis")
19
 
20
+ # Store scenario text for use in other methods
21
+ self.scenario_text = scenario_text
22
+
23
  # Extract tasks and requirements
24
  tasks = self._extract_tasks(scenario_text)
25
  requirements = self._extract_requirements(scenario_text)
 
31
  results = {}
32
 
33
  if "facility_distribution" in tasks:
34
+ results["facility_distribution"] = self.analyze_facility_distribution(relevant_data, requirements)
35
 
36
  if "capacity_analysis" in tasks:
37
+ results["capacity_analysis"] = self.analyze_capacity(relevant_data, requirements)
38
 
39
  if "resource_allocation" in tasks:
40
  results["resource_allocation"] = self.analyze_resource_allocation(relevant_data)
 
73
  "geographic_scope": self._extract_geographic_scope(scenario_text),
74
  "time_period": self._extract_time_period(scenario_text),
75
  "facility_types": self._extract_facility_types(scenario_text),
76
+ "metrics_needed": self._extract_metrics(scenario_text),
77
+ "regions": self._extract_regions(scenario_text)
78
  }
79
 
80
+ def _extract_regions(self, scenario_text: str) -> List[str]:
81
+ """Extract specific regions mentioned in the scenario"""
82
+ # Look for region names in the scenario
83
+ regions = []
84
+
85
+ # Common region patterns - this could be expanded
86
+ region_patterns = [
87
+ r'([A-Z][a-z]+ (Zone|Region|Area|District))',
88
+ r'(North|South|East|West|Central|Calgary|Edmonton|Toronto|Vancouver|Montreal)',
89
+ r'(Alberta|British Columbia|Ontario|Quebec|Manitoba|Saskatchewan|Nova Scotia|New Brunswick|PEI|Newfoundland|Yukon|NWT|Nunavut)'
90
+ ]
91
+
92
+ import re
93
+ for pattern in region_patterns:
94
+ matches = re.findall(pattern, scenario_text)
95
+ for match in matches:
96
+ if isinstance(match, tuple):
97
+ regions.append(match[0])
98
+ else:
99
+ regions.append(match)
100
+
101
+ # Remove duplicates while preserving order
102
+ seen = set()
103
+ unique_regions = [r for r in regions if not (r in seen or seen.add(r))]
104
+
105
+ return unique_regions
106
+
107
+ def analyze_facility_distribution(self, relevant_data: List[str], requirements: Dict[str, Any]) -> Dict[str, Any]:
108
  """Enhanced facility distribution analysis"""
109
  results = {}
110
+ geographic_scope = requirements.get("geographic_scope", "Unknown")
111
+ regions = requirements.get("regions", [])
112
 
113
  for data_name in relevant_data:
114
  df = self.data_registry.get(data_name)
115
  if df is None or df.empty:
116
  continue
117
 
118
+ # Filter data based on geographic scope
119
+ filtered_df = self._filter_by_geography(df, geographic_scope, regions)
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ if filtered_df.empty:
122
+ continue
123
+
124
  # Facility type distribution
125
+ type_col = self._find_column(filtered_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
126
  if type_col:
127
  # Ensure we're working with string data
128
+ filtered_df[type_col] = filtered_df[type_col].astype(str)
129
+ type_dist = filtered_df[type_col].value_counts().to_dict()
130
  results["facility_type_distribution"] = type_dist
131
 
132
  # Calculate diversity index
133
  diversity = self._calculate_diversity_index(type_dist)
134
  results["facility_diversity"] = diversity
135
 
136
+ # Geographic distribution
137
+ geo_col = self._find_column(filtered_df, ['province', 'state', 'region', 'zone', 'area'])
138
+ if geo_col:
139
  # Ensure we're working with string data
140
+ filtered_df[geo_col] = filtered_df[geo_col].astype(str)
141
+ geo_dist = filtered_df[geo_col].value_counts().to_dict()
142
+ results["geographic_distribution"] = geo_dist
143
+
144
+ # Calculate Gini coefficient for inequality
145
+ gini = self._calculate_gini(list(geo_dist.values()))
146
+ results["geographic_inequality"] = gini
147
 
148
  # City distribution
149
+ city_col = self._find_column(filtered_df, ['city', 'municipality', 'town'])
150
  if city_col:
151
  # Ensure we're working with string data
152
+ filtered_df[city_col] = filtered_df[city_col].astype(str)
153
+ city_counts = filtered_df[city_col].value_counts().head(5)
154
  top_cities = city_counts.index.tolist()
155
 
156
  # Breakdown by facility type for top cities
157
  city_breakdown = {}
158
  for city in top_cities:
159
+ city_data = filtered_df[filtered_df[city_col] == city]
160
  if not city_data.empty and type_col in city_data.columns:
161
  city_breakdown[city] = city_data[type_col].value_counts().to_dict()
162
 
 
164
  results["city_breakdown"] = city_breakdown
165
 
166
  # Total facilities count
167
+ results["total_facilities"] = len(filtered_df)
168
 
169
  return results
170
 
171
+ def analyze_capacity(self, relevant_data: List[str], requirements: Dict[str, Any]) -> Dict[str, Any]:
172
  """Enhanced capacity analysis"""
173
  results = {}
174
+ geographic_scope = requirements.get("geographic_scope", "Unknown")
175
+ regions = requirements.get("regions", [])
176
 
177
  for data_name in relevant_data:
178
  df = self.data_registry.get(data_name)
179
  if df is None or df.empty:
180
  continue
181
 
182
+ # Filter data based on geographic scope
183
+ filtered_df = self._filter_by_geography(df, geographic_scope, regions)
184
+
185
+ if filtered_df.empty:
186
+ continue
187
+
188
  # Current capacity
189
+ capacity_col = self._find_column(filtered_df, ['capacity', 'beds', 'current_capacity', 'beds_current'])
190
  if capacity_col:
191
  # Ensure we're working with numeric data
192
+ filtered_df[capacity_col] = pd.to_numeric(filtered_df[capacity_col], errors='coerce')
193
+ total_capacity = filtered_df[capacity_col].sum()
194
  results["total_capacity"] = total_capacity
195
 
196
  # Capacity by facility type
197
+ type_col = self._find_column(filtered_df, ['type', 'facility_type'])
198
+ if type_col and type_col in filtered_df.columns:
199
+ capacity_by_type = filtered_df.groupby(type_col)[capacity_col].sum().to_dict()
200
  results["capacity_by_type"] = capacity_by_type
201
 
202
  # Capacity utilization
203
+ utilization_col = self._find_column(filtered_df, ['utilization', 'occupancy', 'occupancy_rate'])
204
  if utilization_col:
205
  # Ensure we're working with numeric data
206
+ filtered_df[utilization_col] = pd.to_numeric(filtered_df[utilization_col], errors='coerce')
207
+ avg_utilization = filtered_df[utilization_col].mean()
208
  results["average_utilization"] = avg_utilization
209
 
210
  # Utilization by facility type
211
+ if type_col and type_col in filtered_df.columns:
212
+ utilization_by_type = filtered_df.groupby(type_col)[utilization_col].mean().to_dict()
213
  results["utilization_by_type"] = utilization_by_type
214
 
215
  # Capacity trends
216
+ time_cols = [col for col in filtered_df.columns if any(year in col.lower() for year in ['2020', '2021', '2022', '2023', '2024'])]
217
  if len(time_cols) >= 2:
218
  trend_data = {}
219
  for col in time_cols:
220
  # Ensure we're working with numeric data
221
+ filtered_df[col] = pd.to_numeric(filtered_df[col], errors='coerce')
222
+ trend_data[col] = filtered_df[col].sum()
223
  results["capacity_trends"] = trend_data
224
 
225
  # Calculate growth rate
 
231
  results["capacity_growth_rate"] = growth_rate
232
 
233
  # Bed change analysis
234
+ prev_col = self._find_column(filtered_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
235
+ current_col = self._find_column(filtered_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
236
 
237
  if prev_col and current_col:
238
  # Ensure we're working with numeric data
239
+ filtered_df[prev_col] = pd.to_numeric(filtered_df[prev_col], errors='coerce')
240
+ filtered_df[current_col] = pd.to_numeric(filtered_df[current_col], errors='coerce')
241
 
242
  # Calculate bed change
243
+ filtered_df['bed_change'] = filtered_df[current_col] - filtered_df[prev_col]
244
 
245
  # Calculate percentage change
246
+ filtered_df['percent_change'] = filtered_df.apply(
247
  lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
248
  axis=1
249
  )
250
 
251
+ # Zone/Region-level analysis
252
+ zone_col = self._find_column(filtered_df, ['zone', 'region', 'area', 'district'])
253
  if zone_col:
254
  # Ensure we're working with string data
255
+ filtered_df[zone_col] = filtered_df[zone_col].astype(str)
256
 
257
+ zone_summary = filtered_df.groupby(zone_col).agg({
258
  current_col: 'sum',
259
  prev_col: 'sum',
260
  'bed_change': 'sum'
 
282
  results["max_percentage_decrease"] = max_pct_decrease.to_dict()
283
 
284
  # Identify facilities with largest declines
285
+ facilities_decline = filtered_df.sort_values('bed_change').head(5)
286
  if not facilities_decline.empty:
287
  results["facilities_with_largest_declines"] = facilities_decline.to_dict('records')
288
 
289
  return results
290
 
291
+ def _filter_by_geography(self, df: pd.DataFrame, geographic_scope: str, regions: List[str]) -> pd.DataFrame:
292
+ """Filter dataframe based on geographic scope and regions"""
293
+ if geographic_scope == "Unknown" and not regions:
294
+ return df.copy()
295
+
296
+ # Try to find a geographic column
297
+ geo_col = self._find_column(df, ['province', 'state', 'region', 'zone', 'area', 'district'])
298
+
299
+ if geo_col is None:
300
+ return df.copy()
301
+
302
+ # Ensure we're working with string data
303
+ df[geo_col] = df[geo_col].astype(str)
304
+
305
+ # Create filters
306
+ filters = []
307
+
308
+ # Add geographic scope filter
309
+ if geographic_scope != "Unknown":
310
+ # Create a list of possible values for the geographic scope
311
+ scope_values = [geographic_scope.lower()]
312
+
313
+ # Add common abbreviations
314
+ abbreviations = {
315
+ "alberta": "ab",
316
+ "british columbia": "bc",
317
+ "ontario": "on",
318
+ "quebec": "qc",
319
+ "manitoba": "mb",
320
+ "saskatchewan": "sk",
321
+ "nova scotia": "ns",
322
+ "new brunswick": "nb",
323
+ "prince edward island": "pe",
324
+ "newfoundland": "nl",
325
+ "yukon": "yt",
326
+ "northwest territories": "nt",
327
+ "nunavut": "nu"
328
+ }
329
+
330
+ if geographic_scope.lower() in abbreviations:
331
+ scope_values.append(abbreviations[geographic_scope.lower()])
332
+
333
+ scope_filter = df[geo_col].str.lower().isin(scope_values)
334
+ filters.append(scope_filter)
335
+
336
+ # Add region filters
337
+ if regions:
338
+ region_filter = df[geo_col].str.lower().isin([r.lower() for r in regions])
339
+ filters.append(region_filter)
340
+
341
+ # Apply filters
342
+ if filters:
343
+ combined_filter = filters[0]
344
+ for f in filters[1:]:
345
+ combined_filter = combined_filter | f
346
+
347
+ return df[combined_filter].copy()
348
+
349
+ return df.copy()
350
+
351
  def analyze_resource_allocation(self, relevant_data: List[str]) -> Dict[str, Any]:
352
  """Analyze resource allocation patterns"""
353
  results = {}
 
427
  def generate_recommendations(self, analysis_results: Dict[str, Any], requirements: Dict[str, Any]) -> List[Dict[str, str]]:
428
  """Generate data-driven operational recommendations"""
429
  recommendations = []
430
+ geographic_scope = requirements.get("geographic_scope", "the region")
431
 
432
  # Capacity-related recommendations
433
  if "capacity_analysis" in analysis_results:
 
437
  if "average_utilization" in capacity and capacity["average_utilization"] < 0.7:
438
  recommendations.append({
439
  "title": "Optimize Underutilized Capacity",
440
+ "description": f"Average utilization is {capacity['average_utilization']:.1%} in {geographic_scope}. Consider repurposing underutilized facilities or consolidating services.",
441
  "priority": "Medium",
442
  "data_source": "Capacity utilization analysis"
443
  })
 
446
  if "capacity_growth_rate" in capacity and capacity["capacity_growth_rate"] < 2:
447
  recommendations.append({
448
  "title": "Expand Capacity Strategically",
449
+ "description": f"Capacity growth rate is only {capacity['capacity_growth_rate']:.1f}% in {geographic_scope}. Invest in new facilities or expand existing ones to meet demand.",
450
  "priority": "High",
451
  "data_source": "Capacity trend analysis"
452
  })
453
+
454
+ # Zone-specific recommendations
455
+ if "max_percentage_decrease" in capacity and isinstance(capacity["max_percentage_decrease"], dict):
456
+ zone_col = capacity.get("columns_used", {}).get("zone")
457
+ zone = capacity["max_percentage_decrease"].get(zone_col, 'a zone') if zone_col else 'a zone'
458
+ decrease = capacity["max_percentage_decrease"].get("percent_change", 0)
459
+
460
+ if zone and decrease:
461
+ recommendations.append({
462
+ "title": f"Address Capacity Decline in {zone}",
463
+ "description": f"{zone} shows a {decrease:.1f}% decrease in bed capacity. Investigate causes and implement recovery strategies.",
464
+ "priority": "High",
465
+ "data_source": "Zone capacity analysis"
466
+ })
467
 
468
  # Geographic distribution recommendations
469
  if "facility_distribution" in analysis_results:
 
472
  if "geographic_inequality" in dist and dist["geographic_inequality"] > 0.4:
473
  recommendations.append({
474
  "title": "Address Geographic Inequity",
475
+ "description": f"High geographic inequality (Gini: {dist['geographic_inequality']:.2f}) in {geographic_scope}. Consider targeted investments in underserved areas.",
476
  "priority": "High",
477
  "data_source": "Geographic distribution analysis"
478
  })
 
484
  if "staff_per_bed_ratio" in resources and resources["staff_per_bed_ratio"] < 1.5:
485
  recommendations.append({
486
  "title": "Increase Staffing Levels",
487
+ "description": f"Staff per bed ratio is {resources['staff_per_bed_ratio']:.2f} in {geographic_scope}, which may be insufficient. Consider hiring additional staff.",
488
  "priority": "High",
489
  "data_source": "Resource allocation analysis"
490
  })
 
587
 
588
  def _extract_geographic_scope(self, text):
589
  """Extract geographic scope from text"""
590
+ # Look for province/state names
591
+ provinces = [
592
+ "alberta", "british columbia", "ontario", "quebec", "manitoba",
593
+ "saskatchewan", "nova scotia", "new brunswick", "prince edward island",
594
+ "newfoundland", "yukon", "northwest territories", "nunavut"
595
+ ]
596
+
597
+ states = [
598
+ "alabama", "alaska", "arizona", "arkansas", "california", "colorado",
599
+ "connecticut", "delaware", "florida", "georgia", "hawaii", "idaho",
600
+ "illinois", "indiana", "iowa", "kansas", "kentucky", "louisiana",
601
+ "maine", "maryland", "massachusetts", "michigan", "minnesota",
602
+ "mississippi", "missouri", "montana", "nebraska", "nevada",
603
+ "new hampshire", "new jersey", "new mexico", "new york",
604
+ "north carolina", "north dakota", "ohio", "oklahoma", "oregon",
605
+ "pennsylvania", "rhode island", "south carolina", "south dakota",
606
+ "tennessee", "texas", "utah", "vermont", "virginia", "washington",
607
+ "west virginia", "wisconsin", "wyoming"
608
+ ]
609
+
610
+ text_lower = text.lower()
611
+
612
+ # Check for provinces
613
+ for province in provinces:
614
+ if province in text_lower:
615
+ return province.title()
616
+
617
+ # Check for states
618
+ for state in states:
619
+ if state in text_lower:
620
+ return state.title()
621
+
622
+ # Check for countries
623
+ if "canada" in text_lower:
624
  return "Canada"
625
+ if "usa" in text_lower or "united states" in text_lower:
626
+ return "United States"
627
+
628
  return "Unknown"
629
 
630
  def _extract_time_period(self, text):