Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py - Complete
|
| 2 |
import os, re, json, traceback, pathlib
|
| 3 |
from functools import lru_cache
|
| 4 |
from typing import List, Dict, Any, Tuple, Optional
|
|
@@ -73,7 +73,35 @@ Formatting rules for structured analysis:
|
|
| 73 |
- End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
|
| 74 |
""".strip()
|
| 75 |
|
| 76 |
-
# ----------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
class SessionRAG:
|
| 78 |
def __init__(self):
|
| 79 |
self.docs = []
|
|
@@ -134,6 +162,22 @@ def is_healthcare_scenario(text: str, uploaded_files_paths) -> bool:
|
|
| 134 |
return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \
|
| 135 |
(has_healthcare_files or has_scenario_structure)
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def process_healthcare_data(uploaded_files_paths, data_registry):
|
| 138 |
"""Process healthcare data files with robust error handling."""
|
| 139 |
for file_path in uploaded_files_paths:
|
|
@@ -152,28 +196,25 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
|
|
| 152 |
def analyze_facility_distribution(facilities_df):
|
| 153 |
"""Analyze healthcare facility distribution dynamically."""
|
| 154 |
try:
|
| 155 |
-
#
|
| 156 |
-
province_col = facilities_df
|
| 157 |
-
if
|
| 158 |
-
province_col = province_col[0]
|
| 159 |
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 160 |
ab_facilities = facilities_df[alberta_mask].copy()
|
| 161 |
else:
|
| 162 |
ab_facilities = facilities_df.copy()
|
| 163 |
|
| 164 |
# Find facility type column
|
| 165 |
-
type_col = facilities_df
|
| 166 |
-
if
|
| 167 |
return {"error": "Facility type column not found"}
|
| 168 |
-
type_col = type_col[0]
|
| 169 |
|
| 170 |
# Facility type frequency
|
| 171 |
type_counts = ab_facilities[type_col].value_counts().to_dict()
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
city_col = facilities_df
|
| 175 |
-
if
|
| 176 |
-
city_col = city_col[0]
|
| 177 |
city_counts = ab_facilities[city_col].value_counts().head(5)
|
| 178 |
top_cities = city_counts.index.tolist()
|
| 179 |
|
|
@@ -193,8 +234,8 @@ def analyze_facility_distribution(facilities_df):
|
|
| 193 |
"city_breakdown": city_breakdown,
|
| 194 |
"columns_used": {
|
| 195 |
"facility_type": type_col,
|
| 196 |
-
"city": city_col
|
| 197 |
-
"province": province_col
|
| 198 |
}
|
| 199 |
}
|
| 200 |
except Exception as e:
|
|
@@ -205,14 +246,11 @@ def analyze_bed_capacity(beds_df):
|
|
| 205 |
"""Analyze bed capacity dynamically."""
|
| 206 |
try:
|
| 207 |
# Find required columns
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
if len(current_cols) == 0 or len(prev_cols) == 0:
|
| 212 |
-
return {"error": f"Missing required columns. Found current: {current_cols.tolist()}, prev: {prev_cols.tolist()}"}
|
| 213 |
|
| 214 |
-
current_col
|
| 215 |
-
|
| 216 |
|
| 217 |
# Ensure derived columns exist
|
| 218 |
if 'bed_change' not in beds_df.columns:
|
|
@@ -225,18 +263,16 @@ def analyze_bed_capacity(beds_df):
|
|
| 225 |
)
|
| 226 |
|
| 227 |
# Filter to Alberta if province column exists
|
| 228 |
-
province_col = beds_df
|
| 229 |
-
if
|
| 230 |
-
province_col = province_col[0]
|
| 231 |
alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 232 |
ab_beds = beds_df[alberta_mask].copy()
|
| 233 |
else:
|
| 234 |
ab_beds = beds_df.copy()
|
| 235 |
|
| 236 |
# Calculate zone-level summaries if zone column exists
|
| 237 |
-
zone_col = beds_df
|
| 238 |
-
if
|
| 239 |
-
zone_col = zone_col[0]
|
| 240 |
zone_summary = ab_beds.groupby(zone_col).agg({
|
| 241 |
current_col: 'sum',
|
| 242 |
prev_col: 'sum',
|
|
@@ -275,8 +311,8 @@ def analyze_bed_capacity(beds_df):
|
|
| 275 |
"columns_used": {
|
| 276 |
"beds_current": current_col,
|
| 277 |
"beds_prev": prev_col,
|
| 278 |
-
"zone": zone_col
|
| 279 |
-
"province": province_col
|
| 280 |
}
|
| 281 |
}
|
| 282 |
except Exception as e:
|
|
@@ -287,32 +323,27 @@ def assess_long_term_capacity(facilities_df, beds_df, zone_name):
|
|
| 287 |
"""Assess long-term care capacity dynamically."""
|
| 288 |
try:
|
| 289 |
# Find relevant columns
|
| 290 |
-
zone_col = facilities_df
|
| 291 |
-
city_col = facilities_df
|
| 292 |
-
type_col = facilities_df
|
| 293 |
|
| 294 |
-
if
|
| 295 |
return {"error": "Facility type column not found"}
|
| 296 |
|
| 297 |
-
type_col = type_col[0]
|
| 298 |
-
|
| 299 |
# Get facilities in the specified zone
|
| 300 |
-
if
|
| 301 |
-
zone_col = zone_col[0]
|
| 302 |
zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
|
| 303 |
else:
|
| 304 |
# If zone column not available, use province
|
| 305 |
-
province_col = facilities_df
|
| 306 |
-
if
|
| 307 |
-
province_col = province_col[0]
|
| 308 |
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 309 |
zone_facilities = facilities_df[alberta_mask].copy()
|
| 310 |
else:
|
| 311 |
zone_facilities = facilities_df.copy()
|
| 312 |
|
| 313 |
# Find major city in zone
|
| 314 |
-
if
|
| 315 |
-
city_col = city_col[0]
|
| 316 |
city_counts = zone_facilities[city_col].value_counts()
|
| 317 |
if len(city_counts) > 0:
|
| 318 |
major_city = city_counts.index[0]
|
|
@@ -355,7 +386,8 @@ def generate_operational_recommendations(analysis_results):
|
|
| 355 |
if 'bed_capacity' in analysis_results:
|
| 356 |
bed_data = analysis_results['bed_capacity']
|
| 357 |
if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
|
| 358 |
-
|
|
|
|
| 359 |
decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
|
| 360 |
if zone and decrease:
|
| 361 |
recommendations.append({
|
|
@@ -399,6 +431,9 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
|
|
| 399 |
"""Format the healthcare analysis response with tables and sections."""
|
| 400 |
response = "# Structured Analysis: Healthcare Scenario\n\n"
|
| 401 |
|
|
|
|
|
|
|
|
|
|
| 402 |
# Data Preparation Section
|
| 403 |
if 'facility_distribution' in results:
|
| 404 |
fd = results['facility_distribution']
|
|
@@ -442,10 +477,14 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
|
|
| 442 |
response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
|
| 443 |
response += "|------|---------------|-----------------|-----------------|----------------|\n"
|
| 444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
for zone_data in bc['zone_summary']:
|
| 446 |
-
zone = zone_data.get(
|
| 447 |
-
current = zone_data.get(
|
| 448 |
-
prev = zone_data.get(
|
| 449 |
change = zone_data.get('bed_change', 'N/A')
|
| 450 |
pct = zone_data.get('percent_change', 'N/A')
|
| 451 |
response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
|
|
@@ -453,19 +492,21 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
|
|
| 453 |
|
| 454 |
if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
|
| 455 |
'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
|
|
|
|
| 456 |
abs_dec = bc['max_absolute_decrease']
|
| 457 |
pct_dec = bc['max_percentage_decrease']
|
| 458 |
-
response += f"**Zone with largest absolute decrease**: {abs_dec.get(
|
| 459 |
-
response += f"**Zone with largest percentage decrease**: {pct_dec.get(
|
| 460 |
|
| 461 |
if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
|
| 462 |
response += "### Facilities with Largest Bed Declines\n\n"
|
| 463 |
response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
|
| 464 |
response += "|----------|------|----------------|-----------|\n"
|
| 465 |
|
|
|
|
| 466 |
for facility in bc['facilities_with_largest_declines']:
|
| 467 |
name = facility.get('facility_name', 'N/A')
|
| 468 |
-
zone = facility.get(
|
| 469 |
teaching = facility.get('teaching_status', 'N/A')
|
| 470 |
change = facility.get('bed_change', 'N/A')
|
| 471 |
response += f"| {name} | {zone} | {teaching} | {change} |\n"
|
|
@@ -494,7 +535,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
|
|
| 494 |
response += f"- {ftype}: {count}\n"
|
| 495 |
response += "\n"
|
| 496 |
|
| 497 |
-
# Recommendations Section
|
| 498 |
response += "## 4. Operational Recommendations\n\n"
|
| 499 |
if recommendations:
|
| 500 |
for rec in recommendations:
|
|
@@ -524,10 +565,14 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
|
|
| 524 |
return response
|
| 525 |
|
| 526 |
def handle_healthcare_scenario(scenario_text, data_registry, history):
|
| 527 |
-
"""Handle healthcare scenarios dynamically."""
|
| 528 |
try:
|
| 529 |
results = {}
|
| 530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
# Dynamically identify relevant files
|
| 532 |
facility_files = data_registry.get_data_by_type('facility_data')
|
| 533 |
bed_files = data_registry.get_data_by_type('bed_data')
|
|
@@ -549,14 +594,15 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
|
|
| 549 |
"bed_files": bed_files
|
| 550 |
})
|
| 551 |
|
| 552 |
-
#
|
| 553 |
if facilities_df is not None:
|
| 554 |
results['facility_distribution'] = analyze_facility_distribution(facilities_df)
|
| 555 |
|
|
|
|
| 556 |
if beds_df is not None:
|
| 557 |
results['bed_capacity'] = analyze_bed_capacity(beds_df)
|
| 558 |
|
| 559 |
-
# Long-term care assessment
|
| 560 |
if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
|
| 561 |
zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
|
| 562 |
if zone_col:
|
|
@@ -568,13 +614,13 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
|
|
| 568 |
worst_zone
|
| 569 |
)
|
| 570 |
|
| 571 |
-
# Generate recommendations
|
| 572 |
recommendations = generate_operational_recommendations(results)
|
| 573 |
|
| 574 |
-
# Generate AI integration discussion
|
| 575 |
ai_integration = generate_ai_integration_discussion(results)
|
| 576 |
|
| 577 |
-
# Format response
|
| 578 |
response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
|
| 579 |
|
| 580 |
return response
|
|
@@ -704,14 +750,14 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
|
|
| 704 |
return history + [(user_msg, ans)], awaiting_answers
|
| 705 |
|
| 706 |
if is_identity_query(safe_in, history):
|
| 707 |
-
ans = "I am an AI analytical system designed to help
|
| 708 |
return history + [(user_msg, ans)], awaiting_answers
|
| 709 |
|
| 710 |
# Initialize data registry and session RAG
|
| 711 |
data_registry = DataRegistry()
|
| 712 |
session_rag = SessionRAG()
|
| 713 |
|
| 714 |
-
# Process uploaded files
|
| 715 |
if uploaded_files_paths:
|
| 716 |
process_healthcare_data(uploaded_files_paths, data_registry)
|
| 717 |
|
|
@@ -728,16 +774,55 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
|
|
| 728 |
df = data_registry.get(file_name)
|
| 729 |
session_rag.csv_columns = list(df.columns)
|
| 730 |
|
| 731 |
-
#
|
| 732 |
if is_healthcare_scenario(safe_in, uploaded_files_paths):
|
| 733 |
-
#
|
| 734 |
response = handle_healthcare_scenario(safe_in, data_registry, history)
|
| 735 |
return history + [(user_msg, response)], False
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
|
| 742 |
except Exception as e:
|
| 743 |
err = f"Error: {e}"
|
|
@@ -778,16 +863,16 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
|
|
| 778 |
# --- HERO (initial screen) ---
|
| 779 |
with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap:
|
| 780 |
with gr.Column(elem_id="hero"):
|
| 781 |
-
gr.HTML("<h2>
|
| 782 |
with gr.Row(elem_classes="search-row"):
|
| 783 |
hero_msg = gr.Textbox(
|
| 784 |
-
placeholder="
|
| 785 |
show_label=False,
|
| 786 |
lines=1,
|
| 787 |
elem_classes="hero-box"
|
| 788 |
)
|
| 789 |
hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
|
| 790 |
-
gr.Markdown('<div class="hint">
|
| 791 |
|
| 792 |
# --- MAIN APP (hidden until first message) ---
|
| 793 |
with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
|
|
@@ -802,7 +887,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
|
|
| 802 |
msg = gr.Textbox(
|
| 803 |
label="",
|
| 804 |
show_label=False,
|
| 805 |
-
placeholder="
|
| 806 |
scale=10,
|
| 807 |
elem_id="chat-msg",
|
| 808 |
lines=1,
|
|
|
|
| 1 |
+
# app.py - Complete Dual-Mode Healthcare Analysis System
|
| 2 |
import os, re, json, traceback, pathlib
|
| 3 |
from functools import lru_cache
|
| 4 |
from typing import List, Dict, Any, Tuple, Optional
|
|
|
|
| 73 |
- End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
|
| 74 |
""".strip()
|
| 75 |
|
| 76 |
+
# ---------- Helper Functions ----------
|
| 77 |
+
def find_column(df, patterns):
|
| 78 |
+
"""Find the first column in df that matches any of the patterns."""
|
| 79 |
+
if df is None or df.empty:
|
| 80 |
+
return None
|
| 81 |
+
for col in df.columns:
|
| 82 |
+
if any(pattern.lower() in col.lower() for pattern in patterns):
|
| 83 |
+
return col
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
def extract_scenario_tasks(scenario_text):
|
| 87 |
+
"""Extract specific tasks from scenario text."""
|
| 88 |
+
tasks = []
|
| 89 |
+
lines = scenario_text.split('\n')
|
| 90 |
+
in_tasks = False
|
| 91 |
+
for line in lines:
|
| 92 |
+
line = line.strip()
|
| 93 |
+
if line.lower().startswith('tasks'):
|
| 94 |
+
in_tasks = True
|
| 95 |
+
continue
|
| 96 |
+
if in_tasks:
|
| 97 |
+
if line.lower().startswith('operational recommendations') or line.lower().startswith('future integration'):
|
| 98 |
+
in_tasks = False
|
| 99 |
+
continue
|
| 100 |
+
if line and (line.startswith(('1.', '2.', '3.', '4.', '5.')) or line.startswith(('•', '-', '*'))):
|
| 101 |
+
tasks.append(line)
|
| 102 |
+
return tasks
|
| 103 |
+
|
| 104 |
+
# ---------- Session RAG Class ----------
|
| 105 |
class SessionRAG:
|
| 106 |
def __init__(self):
|
| 107 |
self.docs = []
|
|
|
|
| 162 |
return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \
|
| 163 |
(has_healthcare_files or has_scenario_structure)
|
| 164 |
|
| 165 |
+
def is_general_conversation(text: str, uploaded_files_paths) -> bool:
|
| 166 |
+
"""Determine if this is a general conversation rather than a scenario analysis."""
|
| 167 |
+
# If there are uploaded files, it's likely a scenario
|
| 168 |
+
if uploaded_files_paths:
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
# Check for scenario indicators
|
| 172 |
+
scenario_indicators = [
|
| 173 |
+
"scenario", "analyze", "analysis", "assess", "evaluate", "recommend",
|
| 174 |
+
"tasks", "background", "situation", "dataset", "data"
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
# If no scenario indicators, it's likely general conversation
|
| 178 |
+
text_lower = text.lower()
|
| 179 |
+
return not any(indicator in text_lower for indicator in scenario_indicators)
|
| 180 |
+
|
| 181 |
def process_healthcare_data(uploaded_files_paths, data_registry):
|
| 182 |
"""Process healthcare data files with robust error handling."""
|
| 183 |
for file_path in uploaded_files_paths:
|
|
|
|
| 196 |
def analyze_facility_distribution(facilities_df):
|
| 197 |
"""Analyze healthcare facility distribution dynamically."""
|
| 198 |
try:
|
| 199 |
+
# Find province column
|
| 200 |
+
province_col = find_column(facilities_df, ['province', 'state', 'territory'])
|
| 201 |
+
if province_col:
|
|
|
|
| 202 |
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 203 |
ab_facilities = facilities_df[alberta_mask].copy()
|
| 204 |
else:
|
| 205 |
ab_facilities = facilities_df.copy()
|
| 206 |
|
| 207 |
# Find facility type column
|
| 208 |
+
type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
|
| 209 |
+
if not type_col:
|
| 210 |
return {"error": "Facility type column not found"}
|
|
|
|
| 211 |
|
| 212 |
# Facility type frequency
|
| 213 |
type_counts = ab_facilities[type_col].value_counts().to_dict()
|
| 214 |
|
| 215 |
+
# Find city column
|
| 216 |
+
city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
|
| 217 |
+
if city_col:
|
|
|
|
| 218 |
city_counts = ab_facilities[city_col].value_counts().head(5)
|
| 219 |
top_cities = city_counts.index.tolist()
|
| 220 |
|
|
|
|
| 234 |
"city_breakdown": city_breakdown,
|
| 235 |
"columns_used": {
|
| 236 |
"facility_type": type_col,
|
| 237 |
+
"city": city_col,
|
| 238 |
+
"province": province_col
|
| 239 |
}
|
| 240 |
}
|
| 241 |
except Exception as e:
|
|
|
|
| 246 |
"""Analyze bed capacity dynamically."""
|
| 247 |
try:
|
| 248 |
# Find required columns
|
| 249 |
+
current_col = find_column(beds_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
|
| 250 |
+
prev_col = find_column(beds_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
+
if not current_col or not prev_col:
|
| 253 |
+
return {"error": f"Missing required columns. Found current: {current_col}, prev: {prev_col}"}
|
| 254 |
|
| 255 |
# Ensure derived columns exist
|
| 256 |
if 'bed_change' not in beds_df.columns:
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
# Filter to Alberta if province column exists
|
| 266 |
+
province_col = find_column(beds_df, ['province', 'state', 'territory'])
|
| 267 |
+
if province_col:
|
|
|
|
| 268 |
alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 269 |
ab_beds = beds_df[alberta_mask].copy()
|
| 270 |
else:
|
| 271 |
ab_beds = beds_df.copy()
|
| 272 |
|
| 273 |
# Calculate zone-level summaries if zone column exists
|
| 274 |
+
zone_col = find_column(beds_df, ['zone', 'region', 'area', 'district'])
|
| 275 |
+
if zone_col:
|
|
|
|
| 276 |
zone_summary = ab_beds.groupby(zone_col).agg({
|
| 277 |
current_col: 'sum',
|
| 278 |
prev_col: 'sum',
|
|
|
|
| 311 |
"columns_used": {
|
| 312 |
"beds_current": current_col,
|
| 313 |
"beds_prev": prev_col,
|
| 314 |
+
"zone": zone_col,
|
| 315 |
+
"province": province_col
|
| 316 |
}
|
| 317 |
}
|
| 318 |
except Exception as e:
|
|
|
|
| 323 |
"""Assess long-term care capacity dynamically."""
|
| 324 |
try:
|
| 325 |
# Find relevant columns
|
| 326 |
+
zone_col = find_column(facilities_df, ['zone', 'region', 'area', 'district'])
|
| 327 |
+
city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
|
| 328 |
+
type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
|
| 329 |
|
| 330 |
+
if not type_col:
|
| 331 |
return {"error": "Facility type column not found"}
|
| 332 |
|
|
|
|
|
|
|
| 333 |
# Get facilities in the specified zone
|
| 334 |
+
if zone_col:
|
|
|
|
| 335 |
zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
|
| 336 |
else:
|
| 337 |
# If zone column not available, use province
|
| 338 |
+
province_col = find_column(facilities_df, ['province', 'state', 'territory'])
|
| 339 |
+
if province_col:
|
|
|
|
| 340 |
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 341 |
zone_facilities = facilities_df[alberta_mask].copy()
|
| 342 |
else:
|
| 343 |
zone_facilities = facilities_df.copy()
|
| 344 |
|
| 345 |
# Find major city in zone
|
| 346 |
+
if city_col:
|
|
|
|
| 347 |
city_counts = zone_facilities[city_col].value_counts()
|
| 348 |
if len(city_counts) > 0:
|
| 349 |
major_city = city_counts.index[0]
|
|
|
|
| 386 |
if 'bed_capacity' in analysis_results:
|
| 387 |
bed_data = analysis_results['bed_capacity']
|
| 388 |
if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
|
| 389 |
+
zone_col = bed_data.get('columns_used', {}).get('zone')
|
| 390 |
+
zone = bed_data['max_percentage_decrease'].get(zone_col, '') if zone_col else ''
|
| 391 |
decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
|
| 392 |
if zone and decrease:
|
| 393 |
recommendations.append({
|
|
|
|
| 431 |
"""Format the healthcare analysis response with tables and sections."""
|
| 432 |
response = "# Structured Analysis: Healthcare Scenario\n\n"
|
| 433 |
|
| 434 |
+
# Extract tasks from scenario to ensure we address all requirements
|
| 435 |
+
tasks = extract_scenario_tasks(scenario_text)
|
| 436 |
+
|
| 437 |
# Data Preparation Section
|
| 438 |
if 'facility_distribution' in results:
|
| 439 |
fd = results['facility_distribution']
|
|
|
|
| 477 |
response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
|
| 478 |
response += "|------|---------------|-----------------|-----------------|----------------|\n"
|
| 479 |
|
| 480 |
+
zone_col = bc.get('columns_used', {}).get('zone')
|
| 481 |
+
current_col = bc.get('columns_used', {}).get('beds_current')
|
| 482 |
+
prev_col = bc.get('columns_used', {}).get('beds_prev')
|
| 483 |
+
|
| 484 |
for zone_data in bc['zone_summary']:
|
| 485 |
+
zone = zone_data.get(zone_col, 'N/A') if zone_col else 'N/A'
|
| 486 |
+
current = zone_data.get(current_col, 'N/A') if current_col else 'N/A'
|
| 487 |
+
prev = zone_data.get(prev_col, 'N/A') if prev_col else 'N/A'
|
| 488 |
change = zone_data.get('bed_change', 'N/A')
|
| 489 |
pct = zone_data.get('percent_change', 'N/A')
|
| 490 |
response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
|
|
|
|
| 492 |
|
| 493 |
if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
|
| 494 |
'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
|
| 495 |
+
zone_col = bc.get('columns_used', {}).get('zone')
|
| 496 |
abs_dec = bc['max_absolute_decrease']
|
| 497 |
pct_dec = bc['max_percentage_decrease']
|
| 498 |
+
response += f"**Zone with largest absolute decrease**: {abs_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
|
| 499 |
+
response += f"**Zone with largest percentage decrease**: {pct_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
|
| 500 |
|
| 501 |
if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
|
| 502 |
response += "### Facilities with Largest Bed Declines\n\n"
|
| 503 |
response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
|
| 504 |
response += "|----------|------|----------------|-----------|\n"
|
| 505 |
|
| 506 |
+
zone_col = bc.get('columns_used', {}).get('zone')
|
| 507 |
for facility in bc['facilities_with_largest_declines']:
|
| 508 |
name = facility.get('facility_name', 'N/A')
|
| 509 |
+
zone = facility.get(zone_col, 'N/A') if zone_col else 'N/A'
|
| 510 |
teaching = facility.get('teaching_status', 'N/A')
|
| 511 |
change = facility.get('bed_change', 'N/A')
|
| 512 |
response += f"| {name} | {zone} | {teaching} | {change} |\n"
|
|
|
|
| 535 |
response += f"- {ftype}: {count}\n"
|
| 536 |
response += "\n"
|
| 537 |
|
| 538 |
+
# Operational Recommendations Section
|
| 539 |
response += "## 4. Operational Recommendations\n\n"
|
| 540 |
if recommendations:
|
| 541 |
for rec in recommendations:
|
|
|
|
| 565 |
return response
|
| 566 |
|
| 567 |
def handle_healthcare_scenario(scenario_text, data_registry, history):
|
| 568 |
+
"""Handle healthcare scenarios dynamically with explicit task following."""
|
| 569 |
try:
|
| 570 |
results = {}
|
| 571 |
|
| 572 |
+
# Extract tasks from scenario to ensure we address all requirements
|
| 573 |
+
tasks = extract_scenario_tasks(scenario_text)
|
| 574 |
+
print(f"Extracted tasks: {tasks}")
|
| 575 |
+
|
| 576 |
# Dynamically identify relevant files
|
| 577 |
facility_files = data_registry.get_data_by_type('facility_data')
|
| 578 |
bed_files = data_registry.get_data_by_type('bed_data')
|
|
|
|
| 594 |
"bed_files": bed_files
|
| 595 |
})
|
| 596 |
|
| 597 |
+
# Task 1: Data preparation (facility distribution)
|
| 598 |
if facilities_df is not None:
|
| 599 |
results['facility_distribution'] = analyze_facility_distribution(facilities_df)
|
| 600 |
|
| 601 |
+
# Task 2: Bed capacity analysis
|
| 602 |
if beds_df is not None:
|
| 603 |
results['bed_capacity'] = analyze_bed_capacity(beds_df)
|
| 604 |
|
| 605 |
+
# Task 3: Long-term care capacity assessment
|
| 606 |
if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
|
| 607 |
zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
|
| 608 |
if zone_col:
|
|
|
|
| 614 |
worst_zone
|
| 615 |
)
|
| 616 |
|
| 617 |
+
# Generate operational recommendations (Task 4.1)
|
| 618 |
recommendations = generate_operational_recommendations(results)
|
| 619 |
|
| 620 |
+
# Generate AI integration discussion (Task 4.2)
|
| 621 |
ai_integration = generate_ai_integration_discussion(results)
|
| 622 |
|
| 623 |
+
# Format response ensuring all tasks are addressed
|
| 624 |
response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
|
| 625 |
|
| 626 |
return response
|
|
|
|
| 750 |
return history + [(user_msg, ans)], awaiting_answers
|
| 751 |
|
| 752 |
if is_identity_query(safe_in, history):
|
| 753 |
+
ans = "I am an AI analytical system designed to help with both general conversations and healthcare scenario analysis. I can answer your questions and also analyze healthcare data when you upload files and describe a scenario."
|
| 754 |
return history + [(user_msg, ans)], awaiting_answers
|
| 755 |
|
| 756 |
# Initialize data registry and session RAG
|
| 757 |
data_registry = DataRegistry()
|
| 758 |
session_rag = SessionRAG()
|
| 759 |
|
| 760 |
+
# Process uploaded files if any
|
| 761 |
if uploaded_files_paths:
|
| 762 |
process_healthcare_data(uploaded_files_paths, data_registry)
|
| 763 |
|
|
|
|
| 774 |
df = data_registry.get(file_name)
|
| 775 |
session_rag.csv_columns = list(df.columns)
|
| 776 |
|
| 777 |
+
# Determine the mode: healthcare scenario or general conversation
|
| 778 |
if is_healthcare_scenario(safe_in, uploaded_files_paths):
|
| 779 |
+
# Healthcare scenario mode
|
| 780 |
response = handle_healthcare_scenario(safe_in, data_registry, history)
|
| 781 |
return history + [(user_msg, response)], False
|
| 782 |
+
else:
|
| 783 |
+
# General conversation mode
|
| 784 |
+
# Try Cohere first if available
|
| 785 |
+
if USE_HOSTED_COHERE:
|
| 786 |
+
out = cohere_chat(safe_in, history)
|
| 787 |
+
if out:
|
| 788 |
+
out = _sanitize_text(out)
|
| 789 |
+
safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
|
| 790 |
+
if blocked_out:
|
| 791 |
+
safe_out = refusal_reply(reason_out)
|
| 792 |
+
log_event("assistant_reply", None, {
|
| 793 |
+
**hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
|
| 794 |
+
**hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
|
| 795 |
+
"mode": "general_cohere",
|
| 796 |
+
})
|
| 797 |
+
return history + [(user_msg, safe_out)], False
|
| 798 |
+
|
| 799 |
+
# Fall back to local model
|
| 800 |
+
try:
|
| 801 |
+
model, tokenizer = load_local_model()
|
| 802 |
+
inputs = build_inputs(tokenizer, safe_in, history)
|
| 803 |
+
out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
|
| 804 |
+
|
| 805 |
+
if isinstance(out, str):
|
| 806 |
+
for tag in ("Assistant:", "System:", "User:"):
|
| 807 |
+
if out.startswith(tag):
|
| 808 |
+
out = out[len(tag):].strip()
|
| 809 |
+
|
| 810 |
+
out = _sanitize_text(out or "")
|
| 811 |
+
safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
|
| 812 |
+
if blocked_out:
|
| 813 |
+
safe_out = refusal_reply(reason_out)
|
| 814 |
+
|
| 815 |
+
log_event("assistant_reply", None, {
|
| 816 |
+
**hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
|
| 817 |
+
**hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
|
| 818 |
+
"mode": "general_local",
|
| 819 |
+
})
|
| 820 |
+
|
| 821 |
+
return history + [(user_msg, safe_out)], False
|
| 822 |
+
except Exception as e:
|
| 823 |
+
err = f"Error generating response: {str(e)}"
|
| 824 |
+
log_event("model_error", None, {"error": str(e)})
|
| 825 |
+
return history + [(user_msg, err)], False
|
| 826 |
|
| 827 |
except Exception as e:
|
| 828 |
err = f"Error: {e}"
|
|
|
|
| 863 |
# --- HERO (initial screen) ---
|
| 864 |
with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap:
|
| 865 |
with gr.Column(elem_id="hero"):
|
| 866 |
+
gr.HTML("<h2>How can I help you today?</h2>")
|
| 867 |
with gr.Row(elem_classes="search-row"):
|
| 868 |
hero_msg = gr.Textbox(
|
| 869 |
+
placeholder="Ask me anything or upload healthcare data files for scenario analysis…",
|
| 870 |
show_label=False,
|
| 871 |
lines=1,
|
| 872 |
elem_classes="hero-box"
|
| 873 |
)
|
| 874 |
hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
|
| 875 |
+
gr.Markdown('<div class="hint">I can help with general questions or analyze healthcare scenarios when you upload data files and describe your analysis needs.</div>')
|
| 876 |
|
| 877 |
# --- MAIN APP (hidden until first message) ---
|
| 878 |
with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
|
|
|
|
| 887 |
msg = gr.Textbox(
|
| 888 |
label="",
|
| 889 |
show_label=False,
|
| 890 |
+
placeholder="Ask me anything or continue your healthcare scenario analysis…",
|
| 891 |
scale=10,
|
| 892 |
elem_id="chat-msg",
|
| 893 |
lines=1,
|