Rajan Sharma commited on
Commit
468bc14
·
verified ·
1 Parent(s): c1ff5e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -70
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py - Complete Dynamic Healthcare Scenario Analysis System
2
  import os, re, json, traceback, pathlib
3
  from functools import lru_cache
4
  from typing import List, Dict, Any, Tuple, Optional
@@ -73,7 +73,35 @@ Formatting rules for structured analysis:
73
  - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
74
  """.strip()
75
 
76
- # ---------- Session RAG Class (Simplified) ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  class SessionRAG:
78
  def __init__(self):
79
  self.docs = []
@@ -134,6 +162,22 @@ def is_healthcare_scenario(text: str, uploaded_files_paths) -> bool:
134
  return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \
135
  (has_healthcare_files or has_scenario_structure)
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  def process_healthcare_data(uploaded_files_paths, data_registry):
138
  """Process healthcare data files with robust error handling."""
139
  for file_path in uploaded_files_paths:
@@ -152,28 +196,25 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
152
  def analyze_facility_distribution(facilities_df):
153
  """Analyze healthcare facility distribution dynamically."""
154
  try:
155
- # Filter to Alberta if province column exists
156
- province_col = facilities_df.columns[facilities_df.columns.str.contains('province', case=False)]
157
- if len(province_col) > 0:
158
- province_col = province_col[0]
159
  alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
160
  ab_facilities = facilities_df[alberta_mask].copy()
161
  else:
162
  ab_facilities = facilities_df.copy()
163
 
164
  # Find facility type column
165
- type_col = facilities_df.columns[facilities_df.columns.str.contains('type', case=False)]
166
- if len(type_col) == 0:
167
  return {"error": "Facility type column not found"}
168
- type_col = type_col[0]
169
 
170
  # Facility type frequency
171
  type_counts = ab_facilities[type_col].value_counts().to_dict()
172
 
173
- # Top cities by facility count
174
- city_col = facilities_df.columns[facilities_df.columns.str.contains('city', case=False)]
175
- if len(city_col) > 0:
176
- city_col = city_col[0]
177
  city_counts = ab_facilities[city_col].value_counts().head(5)
178
  top_cities = city_counts.index.tolist()
179
 
@@ -193,8 +234,8 @@ def analyze_facility_distribution(facilities_df):
193
  "city_breakdown": city_breakdown,
194
  "columns_used": {
195
  "facility_type": type_col,
196
- "city": city_col[0] if len(city_col) > 0 else None,
197
- "province": province_col[0] if len(province_col) > 0 else None
198
  }
199
  }
200
  except Exception as e:
@@ -205,14 +246,11 @@ def analyze_bed_capacity(beds_df):
205
  """Analyze bed capacity dynamically."""
206
  try:
207
  # Find required columns
208
- current_cols = beds_df.columns[beds_df.columns.str.contains('current|2023|2024', case=False)]
209
- prev_cols = beds_df.columns[beds_df.columns.str.contains('prev|2022|previous', case=False)]
210
-
211
- if len(current_cols) == 0 or len(prev_cols) == 0:
212
- return {"error": f"Missing required columns. Found current: {current_cols.tolist()}, prev: {prev_cols.tolist()}"}
213
 
214
- current_col = current_cols[0]
215
- prev_col = prev_cols[0]
216
 
217
  # Ensure derived columns exist
218
  if 'bed_change' not in beds_df.columns:
@@ -225,18 +263,16 @@ def analyze_bed_capacity(beds_df):
225
  )
226
 
227
  # Filter to Alberta if province column exists
228
- province_col = beds_df.columns[beds_df.columns.str.contains('province', case=False)]
229
- if len(province_col) > 0:
230
- province_col = province_col[0]
231
  alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
232
  ab_beds = beds_df[alberta_mask].copy()
233
  else:
234
  ab_beds = beds_df.copy()
235
 
236
  # Calculate zone-level summaries if zone column exists
237
- zone_col = beds_df.columns[beds_df.columns.str.contains('zone|region|area', case=False)]
238
- if len(zone_col) > 0:
239
- zone_col = zone_col[0]
240
  zone_summary = ab_beds.groupby(zone_col).agg({
241
  current_col: 'sum',
242
  prev_col: 'sum',
@@ -275,8 +311,8 @@ def analyze_bed_capacity(beds_df):
275
  "columns_used": {
276
  "beds_current": current_col,
277
  "beds_prev": prev_col,
278
- "zone": zone_col[0] if len(zone_col) > 0 else None,
279
- "province": province_col[0] if len(province_col) > 0 else None
280
  }
281
  }
282
  except Exception as e:
@@ -287,32 +323,27 @@ def assess_long_term_capacity(facilities_df, beds_df, zone_name):
287
  """Assess long-term care capacity dynamically."""
288
  try:
289
  # Find relevant columns
290
- zone_col = facilities_df.columns[facilities_df.columns.str.contains('zone|region|area', case=False)]
291
- city_col = facilities_df.columns[facilities_df.columns.str.contains('city|municipality|town', case=False)]
292
- type_col = facilities_df.columns[facilities_df.columns.str.contains('type|category|class', case=False)]
293
 
294
- if len(type_col) == 0:
295
  return {"error": "Facility type column not found"}
296
 
297
- type_col = type_col[0]
298
-
299
  # Get facilities in the specified zone
300
- if len(zone_col) > 0:
301
- zone_col = zone_col[0]
302
  zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
303
  else:
304
  # If zone column not available, use province
305
- province_col = facilities_df.columns[facilities_df.columns.str.contains('province', case=False)]
306
- if len(province_col) > 0:
307
- province_col = province_col[0]
308
  alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
309
  zone_facilities = facilities_df[alberta_mask].copy()
310
  else:
311
  zone_facilities = facilities_df.copy()
312
 
313
  # Find major city in zone
314
- if len(city_col) > 0:
315
- city_col = city_col[0]
316
  city_counts = zone_facilities[city_col].value_counts()
317
  if len(city_counts) > 0:
318
  major_city = city_counts.index[0]
@@ -355,7 +386,8 @@ def generate_operational_recommendations(analysis_results):
355
  if 'bed_capacity' in analysis_results:
356
  bed_data = analysis_results['bed_capacity']
357
  if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
358
- zone = bed_data['max_percentage_decrease'].get('zone', '')
 
359
  decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
360
  if zone and decrease:
361
  recommendations.append({
@@ -399,6 +431,9 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
399
  """Format the healthcare analysis response with tables and sections."""
400
  response = "# Structured Analysis: Healthcare Scenario\n\n"
401
 
 
 
 
402
  # Data Preparation Section
403
  if 'facility_distribution' in results:
404
  fd = results['facility_distribution']
@@ -442,10 +477,14 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
442
  response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
443
  response += "|------|---------------|-----------------|-----------------|----------------|\n"
444
 
 
 
 
 
445
  for zone_data in bc['zone_summary']:
446
- zone = zone_data.get(bc['columns_used']['zone'], 'N/A') if bc['columns_used'].get('zone') else 'N/A'
447
- current = zone_data.get(bc['columns_used']['beds_current'], 'N/A')
448
- prev = zone_data.get(bc['columns_used']['beds_prev'], 'N/A')
449
  change = zone_data.get('bed_change', 'N/A')
450
  pct = zone_data.get('percent_change', 'N/A')
451
  response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
@@ -453,19 +492,21 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
453
 
454
  if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
455
  'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
 
456
  abs_dec = bc['max_absolute_decrease']
457
  pct_dec = bc['max_percentage_decrease']
458
- response += f"**Zone with largest absolute decrease**: {abs_dec.get(bc['columns_used']['zone'], 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
459
- response += f"**Zone with largest percentage decrease**: {pct_dec.get(bc['columns_used']['zone'], 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
460
 
461
  if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
462
  response += "### Facilities with Largest Bed Declines\n\n"
463
  response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
464
  response += "|----------|------|----------------|-----------|\n"
465
 
 
466
  for facility in bc['facilities_with_largest_declines']:
467
  name = facility.get('facility_name', 'N/A')
468
- zone = facility.get(bc['columns_used']['zone'], 'N/A') if bc['columns_used'].get('zone') else 'N/A'
469
  teaching = facility.get('teaching_status', 'N/A')
470
  change = facility.get('bed_change', 'N/A')
471
  response += f"| {name} | {zone} | {teaching} | {change} |\n"
@@ -494,7 +535,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
494
  response += f"- {ftype}: {count}\n"
495
  response += "\n"
496
 
497
- # Recommendations Section
498
  response += "## 4. Operational Recommendations\n\n"
499
  if recommendations:
500
  for rec in recommendations:
@@ -524,10 +565,14 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
524
  return response
525
 
526
  def handle_healthcare_scenario(scenario_text, data_registry, history):
527
- """Handle healthcare scenarios dynamically."""
528
  try:
529
  results = {}
530
 
 
 
 
 
531
  # Dynamically identify relevant files
532
  facility_files = data_registry.get_data_by_type('facility_data')
533
  bed_files = data_registry.get_data_by_type('bed_data')
@@ -549,14 +594,15 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
549
  "bed_files": bed_files
550
  })
551
 
552
- # Perform analyses based on available data
553
  if facilities_df is not None:
554
  results['facility_distribution'] = analyze_facility_distribution(facilities_df)
555
 
 
556
  if beds_df is not None:
557
  results['bed_capacity'] = analyze_bed_capacity(beds_df)
558
 
559
- # Long-term care assessment if we have both data types
560
  if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
561
  zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
562
  if zone_col:
@@ -568,13 +614,13 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
568
  worst_zone
569
  )
570
 
571
- # Generate recommendations
572
  recommendations = generate_operational_recommendations(results)
573
 
574
- # Generate AI integration discussion
575
  ai_integration = generate_ai_integration_discussion(results)
576
 
577
- # Format response
578
  response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
579
 
580
  return response
@@ -704,14 +750,14 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
704
  return history + [(user_msg, ans)], awaiting_answers
705
 
706
  if is_identity_query(safe_in, history):
707
- ans = "I am an AI analytical system designed to help you analyze healthcare scenarios and make data-driven decisions."
708
  return history + [(user_msg, ans)], awaiting_answers
709
 
710
  # Initialize data registry and session RAG
711
  data_registry = DataRegistry()
712
  session_rag = SessionRAG()
713
 
714
- # Process uploaded files
715
  if uploaded_files_paths:
716
  process_healthcare_data(uploaded_files_paths, data_registry)
717
 
@@ -728,16 +774,55 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
728
  df = data_registry.get(file_name)
729
  session_rag.csv_columns = list(df.columns)
730
 
731
- # Check if this is a healthcare scenario
732
  if is_healthcare_scenario(safe_in, uploaded_files_paths):
733
- # Handle healthcare scenario directly
734
  response = handle_healthcare_scenario(safe_in, data_registry, history)
735
  return history + [(user_msg, response)], False
736
-
737
- # For non-healthcare scenarios, use the original logic
738
- # For now, provide a fallback response
739
- response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
740
- return history + [(user_msg, response)], awaiting_answers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
  except Exception as e:
743
  err = f"Error: {e}"
@@ -778,16 +863,16 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
778
  # --- HERO (initial screen) ---
779
  with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap:
780
  with gr.Column(elem_id="hero"):
781
- gr.HTML("<h2>What healthcare scenario can I help you analyze?</h2>")
782
  with gr.Row(elem_classes="search-row"):
783
  hero_msg = gr.Textbox(
784
- placeholder="Describe your healthcare scenario or upload data files for analysis…",
785
  show_label=False,
786
  lines=1,
787
  elem_classes="hero-box"
788
  )
789
  hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
790
- gr.Markdown('<div class="hint">Upload healthcare data files (CSV, Excel, JSON, PDF, etc.) and describe your scenario for comprehensive analysis.</div>')
791
 
792
  # --- MAIN APP (hidden until first message) ---
793
  with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
@@ -802,7 +887,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
802
  msg = gr.Textbox(
803
  label="",
804
  show_label=False,
805
- placeholder="Continue the conversation. Provide additional details or answer clarifying questions.",
806
  scale=10,
807
  elem_id="chat-msg",
808
  lines=1,
 
1
+ # app.py - Complete Dual-Mode Healthcare Analysis System
2
  import os, re, json, traceback, pathlib
3
  from functools import lru_cache
4
  from typing import List, Dict, Any, Tuple, Optional
 
73
  - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
74
  """.strip()
75
 
76
+ # ---------- Helper Functions ----------
77
+ def find_column(df, patterns):
78
+ """Find the first column in df that matches any of the patterns."""
79
+ if df is None or df.empty:
80
+ return None
81
+ for col in df.columns:
82
+ if any(pattern.lower() in col.lower() for pattern in patterns):
83
+ return col
84
+ return None
85
+
86
+ def extract_scenario_tasks(scenario_text):
87
+ """Extract specific tasks from scenario text."""
88
+ tasks = []
89
+ lines = scenario_text.split('\n')
90
+ in_tasks = False
91
+ for line in lines:
92
+ line = line.strip()
93
+ if line.lower().startswith('tasks'):
94
+ in_tasks = True
95
+ continue
96
+ if in_tasks:
97
+ if line.lower().startswith('operational recommendations') or line.lower().startswith('future integration'):
98
+ in_tasks = False
99
+ continue
100
+ if line and (line.startswith(('1.', '2.', '3.', '4.', '5.')) or line.startswith(('•', '-', '*'))):
101
+ tasks.append(line)
102
+ return tasks
103
+
104
+ # ---------- Session RAG Class ----------
105
  class SessionRAG:
106
  def __init__(self):
107
  self.docs = []
 
162
  return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \
163
  (has_healthcare_files or has_scenario_structure)
164
 
165
+ def is_general_conversation(text: str, uploaded_files_paths) -> bool:
166
+ """Determine if this is a general conversation rather than a scenario analysis."""
167
+ # If there are uploaded files, it's likely a scenario
168
+ if uploaded_files_paths:
169
+ return False
170
+
171
+ # Check for scenario indicators
172
+ scenario_indicators = [
173
+ "scenario", "analyze", "analysis", "assess", "evaluate", "recommend",
174
+ "tasks", "background", "situation", "dataset", "data"
175
+ ]
176
+
177
+ # If no scenario indicators, it's likely general conversation
178
+ text_lower = text.lower()
179
+ return not any(indicator in text_lower for indicator in scenario_indicators)
180
+
181
  def process_healthcare_data(uploaded_files_paths, data_registry):
182
  """Process healthcare data files with robust error handling."""
183
  for file_path in uploaded_files_paths:
 
196
  def analyze_facility_distribution(facilities_df):
197
  """Analyze healthcare facility distribution dynamically."""
198
  try:
199
+ # Find province column
200
+ province_col = find_column(facilities_df, ['province', 'state', 'territory'])
201
+ if province_col:
 
202
  alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
203
  ab_facilities = facilities_df[alberta_mask].copy()
204
  else:
205
  ab_facilities = facilities_df.copy()
206
 
207
  # Find facility type column
208
+ type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
209
+ if not type_col:
210
  return {"error": "Facility type column not found"}
 
211
 
212
  # Facility type frequency
213
  type_counts = ab_facilities[type_col].value_counts().to_dict()
214
 
215
+ # Find city column
216
+ city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
217
+ if city_col:
 
218
  city_counts = ab_facilities[city_col].value_counts().head(5)
219
  top_cities = city_counts.index.tolist()
220
 
 
234
  "city_breakdown": city_breakdown,
235
  "columns_used": {
236
  "facility_type": type_col,
237
+ "city": city_col,
238
+ "province": province_col
239
  }
240
  }
241
  except Exception as e:
 
246
  """Analyze bed capacity dynamically."""
247
  try:
248
  # Find required columns
249
+ current_col = find_column(beds_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
250
+ prev_col = find_column(beds_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
 
 
 
251
 
252
+ if not current_col or not prev_col:
253
+ return {"error": f"Missing required columns. Found current: {current_col}, prev: {prev_col}"}
254
 
255
  # Ensure derived columns exist
256
  if 'bed_change' not in beds_df.columns:
 
263
  )
264
 
265
  # Filter to Alberta if province column exists
266
+ province_col = find_column(beds_df, ['province', 'state', 'territory'])
267
+ if province_col:
 
268
  alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
269
  ab_beds = beds_df[alberta_mask].copy()
270
  else:
271
  ab_beds = beds_df.copy()
272
 
273
  # Calculate zone-level summaries if zone column exists
274
+ zone_col = find_column(beds_df, ['zone', 'region', 'area', 'district'])
275
+ if zone_col:
 
276
  zone_summary = ab_beds.groupby(zone_col).agg({
277
  current_col: 'sum',
278
  prev_col: 'sum',
 
311
  "columns_used": {
312
  "beds_current": current_col,
313
  "beds_prev": prev_col,
314
+ "zone": zone_col,
315
+ "province": province_col
316
  }
317
  }
318
  except Exception as e:
 
323
  """Assess long-term care capacity dynamically."""
324
  try:
325
  # Find relevant columns
326
+ zone_col = find_column(facilities_df, ['zone', 'region', 'area', 'district'])
327
+ city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
328
+ type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
329
 
330
+ if not type_col:
331
  return {"error": "Facility type column not found"}
332
 
 
 
333
  # Get facilities in the specified zone
334
+ if zone_col:
 
335
  zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
336
  else:
337
  # If zone column not available, use province
338
+ province_col = find_column(facilities_df, ['province', 'state', 'territory'])
339
+ if province_col:
 
340
  alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
341
  zone_facilities = facilities_df[alberta_mask].copy()
342
  else:
343
  zone_facilities = facilities_df.copy()
344
 
345
  # Find major city in zone
346
+ if city_col:
 
347
  city_counts = zone_facilities[city_col].value_counts()
348
  if len(city_counts) > 0:
349
  major_city = city_counts.index[0]
 
386
  if 'bed_capacity' in analysis_results:
387
  bed_data = analysis_results['bed_capacity']
388
  if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
389
+ zone_col = bed_data.get('columns_used', {}).get('zone')
390
+ zone = bed_data['max_percentage_decrease'].get(zone_col, '') if zone_col else ''
391
  decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
392
  if zone and decrease:
393
  recommendations.append({
 
431
  """Format the healthcare analysis response with tables and sections."""
432
  response = "# Structured Analysis: Healthcare Scenario\n\n"
433
 
434
+ # Extract tasks from scenario to ensure we address all requirements
435
+ tasks = extract_scenario_tasks(scenario_text)
436
+
437
  # Data Preparation Section
438
  if 'facility_distribution' in results:
439
  fd = results['facility_distribution']
 
477
  response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
478
  response += "|------|---------------|-----------------|-----------------|----------------|\n"
479
 
480
+ zone_col = bc.get('columns_used', {}).get('zone')
481
+ current_col = bc.get('columns_used', {}).get('beds_current')
482
+ prev_col = bc.get('columns_used', {}).get('beds_prev')
483
+
484
  for zone_data in bc['zone_summary']:
485
+ zone = zone_data.get(zone_col, 'N/A') if zone_col else 'N/A'
486
+ current = zone_data.get(current_col, 'N/A') if current_col else 'N/A'
487
+ prev = zone_data.get(prev_col, 'N/A') if prev_col else 'N/A'
488
  change = zone_data.get('bed_change', 'N/A')
489
  pct = zone_data.get('percent_change', 'N/A')
490
  response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
 
492
 
493
  if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
494
  'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
495
+ zone_col = bc.get('columns_used', {}).get('zone')
496
  abs_dec = bc['max_absolute_decrease']
497
  pct_dec = bc['max_percentage_decrease']
498
+ response += f"**Zone with largest absolute decrease**: {abs_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
499
+ response += f"**Zone with largest percentage decrease**: {pct_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
500
 
501
  if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
502
  response += "### Facilities with Largest Bed Declines\n\n"
503
  response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
504
  response += "|----------|------|----------------|-----------|\n"
505
 
506
+ zone_col = bc.get('columns_used', {}).get('zone')
507
  for facility in bc['facilities_with_largest_declines']:
508
  name = facility.get('facility_name', 'N/A')
509
+ zone = facility.get(zone_col, 'N/A') if zone_col else 'N/A'
510
  teaching = facility.get('teaching_status', 'N/A')
511
  change = facility.get('bed_change', 'N/A')
512
  response += f"| {name} | {zone} | {teaching} | {change} |\n"
 
535
  response += f"- {ftype}: {count}\n"
536
  response += "\n"
537
 
538
+ # Operational Recommendations Section
539
  response += "## 4. Operational Recommendations\n\n"
540
  if recommendations:
541
  for rec in recommendations:
 
565
  return response
566
 
567
  def handle_healthcare_scenario(scenario_text, data_registry, history):
568
+ """Handle healthcare scenarios dynamically with explicit task following."""
569
  try:
570
  results = {}
571
 
572
+ # Extract tasks from scenario to ensure we address all requirements
573
+ tasks = extract_scenario_tasks(scenario_text)
574
+ print(f"Extracted tasks: {tasks}")
575
+
576
  # Dynamically identify relevant files
577
  facility_files = data_registry.get_data_by_type('facility_data')
578
  bed_files = data_registry.get_data_by_type('bed_data')
 
594
  "bed_files": bed_files
595
  })
596
 
597
+ # Task 1: Data preparation (facility distribution)
598
  if facilities_df is not None:
599
  results['facility_distribution'] = analyze_facility_distribution(facilities_df)
600
 
601
+ # Task 2: Bed capacity analysis
602
  if beds_df is not None:
603
  results['bed_capacity'] = analyze_bed_capacity(beds_df)
604
 
605
+ # Task 3: Long-term care capacity assessment
606
  if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
607
  zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
608
  if zone_col:
 
614
  worst_zone
615
  )
616
 
617
+ # Generate operational recommendations (Task 4.1)
618
  recommendations = generate_operational_recommendations(results)
619
 
620
+ # Generate AI integration discussion (Task 4.2)
621
  ai_integration = generate_ai_integration_discussion(results)
622
 
623
+ # Format response ensuring all tasks are addressed
624
  response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
625
 
626
  return response
 
750
  return history + [(user_msg, ans)], awaiting_answers
751
 
752
  if is_identity_query(safe_in, history):
753
+ ans = "I am an AI analytical system designed to help with both general conversations and healthcare scenario analysis. I can answer your questions and also analyze healthcare data when you upload files and describe a scenario."
754
  return history + [(user_msg, ans)], awaiting_answers
755
 
756
  # Initialize data registry and session RAG
757
  data_registry = DataRegistry()
758
  session_rag = SessionRAG()
759
 
760
+ # Process uploaded files if any
761
  if uploaded_files_paths:
762
  process_healthcare_data(uploaded_files_paths, data_registry)
763
 
 
774
  df = data_registry.get(file_name)
775
  session_rag.csv_columns = list(df.columns)
776
 
777
+ # Determine the mode: healthcare scenario or general conversation
778
  if is_healthcare_scenario(safe_in, uploaded_files_paths):
779
+ # Healthcare scenario mode
780
  response = handle_healthcare_scenario(safe_in, data_registry, history)
781
  return history + [(user_msg, response)], False
782
+ else:
783
+ # General conversation mode
784
+ # Try Cohere first if available
785
+ if USE_HOSTED_COHERE:
786
+ out = cohere_chat(safe_in, history)
787
+ if out:
788
+ out = _sanitize_text(out)
789
+ safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
790
+ if blocked_out:
791
+ safe_out = refusal_reply(reason_out)
792
+ log_event("assistant_reply", None, {
793
+ **hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
794
+ **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
795
+ "mode": "general_cohere",
796
+ })
797
+ return history + [(user_msg, safe_out)], False
798
+
799
+ # Fall back to local model
800
+ try:
801
+ model, tokenizer = load_local_model()
802
+ inputs = build_inputs(tokenizer, safe_in, history)
803
+ out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
804
+
805
+ if isinstance(out, str):
806
+ for tag in ("Assistant:", "System:", "User:"):
807
+ if out.startswith(tag):
808
+ out = out[len(tag):].strip()
809
+
810
+ out = _sanitize_text(out or "")
811
+ safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
812
+ if blocked_out:
813
+ safe_out = refusal_reply(reason_out)
814
+
815
+ log_event("assistant_reply", None, {
816
+ **hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
817
+ **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
818
+ "mode": "general_local",
819
+ })
820
+
821
+ return history + [(user_msg, safe_out)], False
822
+ except Exception as e:
823
+ err = f"Error generating response: {str(e)}"
824
+ log_event("model_error", None, {"error": str(e)})
825
+ return history + [(user_msg, err)], False
826
 
827
  except Exception as e:
828
  err = f"Error: {e}"
 
863
  # --- HERO (initial screen) ---
864
  with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap:
865
  with gr.Column(elem_id="hero"):
866
+ gr.HTML("<h2>How can I help you today?</h2>")
867
  with gr.Row(elem_classes="search-row"):
868
  hero_msg = gr.Textbox(
869
+ placeholder="Ask me anything or upload healthcare data files for scenario analysis…",
870
  show_label=False,
871
  lines=1,
872
  elem_classes="hero-box"
873
  )
874
  hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
875
+ gr.Markdown('<div class="hint">I can help with general questions or analyze healthcare scenarios when you upload data files and describe your analysis needs.</div>')
876
 
877
  # --- MAIN APP (hidden until first message) ---
878
  with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
 
887
  msg = gr.Textbox(
888
  label="",
889
  show_label=False,
890
+ placeholder="Ask me anything or continue your healthcare scenario analysis…",
891
  scale=10,
892
  elem_id="chat-msg",
893
  lines=1,