Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 22

Commit

c1ff5e2

verified ·

1 Parent(s): edf0adb

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -177

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py - Enhanced Healthcare Scenario Analysis System
 import os, re, json, traceback, pathlib
 from functools import lru_cache
 from typing import List, Dict, Any, Tuple, Optional
@@ -10,9 +10,11 @@ import torch
 import regex as re2
 # Import necessary modules
-from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from audit_log import log_event, hash_summary
 from privacy import redact_text, safety_filter, refusal_reply
 # ---------- Writable caches (HF Spaces-safe) ----------
 HOME = pathlib.Path.home()
@@ -48,26 +50,12 @@ except Exception:
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
-# ---------- Healthcare-specific constants ----------
-HEALTHCARE_KEYWORDS = [
-    "hospital", "patient", "bed", "care", "health", "medical", "clinical",
-    "facility", "nursing", "residential", "ambulatory", "healthcare", "occupancy",
-    "capacity", "staff", "zone", "province", "alberta", "cihi", "odhf",
-    "respiratory", "virus", "flu", "surge", "acute", "long-term", "ltc"
-]
-HEALTHCARE_FACILITY_TYPES = {
-    "Hospitals": ["hospital", "medical center", "health centre"],
-    "Nursing and residential care facilities": ["nursing", "residential", "care facility", "long-term care"],
-    "Ambulatory health care services": ["ambulatory", "clinic", "surgery center", "outpatient"]
-}
 # ---------- Config ----------
 MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
-MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
 # ---------- Generic System Prompt ----------
 SYSTEM_MASTER = """
@@ -85,53 +73,6 @@ Formatting rules for structured analysis:
 - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
 """.strip()
-# ---------- Data Registry Class ----------
-class DataRegistry:
-    def __init__(self):
-        self.data = {}
-        self.file_metadata = {}
-    def add_path(self, path):
-        try:
-            file_name = os.path.basename(path)
-            if file_name.endswith('.csv'):
-                df = pd.read_csv(path)
-                self.data[file_name] = df
-                self.file_metadata[file_name] = {
-                    'type': 'csv',
-                    'columns': list(df.columns),
-                    'shape': df.shape,
-                    'sample': df.head(3).to_dict('records')
-                }
-                return True
-        except Exception as e:
-            print(f"Error adding {path}: {e}")
-        return False
-    def names(self):
-        return list(self.data.keys())
-    def get(self, name):
-        return self.data.get(name)
-    def summarize_for_prompt(self):
-        if not self.data:
-            return "No data files registered."
-        summary = []
-        for name, meta in self.file_metadata.items():
-            summary.append(f"File: {name}")
-            summary.append(f"Type: {meta['type']}")
-            summary.append(f"Columns: {', '.join(meta['columns'])}")
-            summary.append(f"Shape: {meta['shape']}")
-            summary.append("")
-        return "\n".join(summary)
-    def clear(self):
-        self.data.clear()
-        self.file_metadata.clear()
 # ---------- Session RAG Class (Simplified) ----------
 class SessionRAG:
     def __init__(self):
@@ -162,12 +103,13 @@ def is_healthcare_scenario(text: str, uploaded_files_paths) -> bool:
     t = (text or "").lower()
     # Check for healthcare keywords
-    has_healthcare_keywords = any(keyword in t for keyword in HEALTHCARE_KEYWORDS)
     # Check for healthcare facility types
     has_facility_types = any(
-        any(ftype in t for ftype in types)
-        for types in HEALTHCARE_FACILITY_TYPES.values()
     )
     # Check for healthcare-specific tasks
@@ -196,25 +138,10 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
     """Process healthcare data files with robust error handling."""
     for file_path in uploaded_files_paths:
         try:
-            file_name = os.path.basename(file_path).lower()
-            if file_name.endswith('.csv'):
-                df = pd.read_csv(file_path)
-                # Standardize column names
-                df.columns = [col.strip().lower().replace(' ', '_').replace('-', '_') for col in df.columns]
-                # Handle healthcare-specific data structures
-                if 'facility_name' in df.columns:
-                    if 'facility_type' not in df.columns and 'odhf_facility_type' in df.columns:
-                        df['facility_type'] = df['odhf_facility_type']
-                if 'beds_current' in df.columns and 'beds_prev' in df.columns:
-                    df['bed_change'] = df['beds_current'] - df['beds_prev']
-                    df['percent_change'] = (df['bed_change'] / df['beds_prev']) * 100
-                data_registry.add_path(file_path)
         except Exception as e:
             print(f"Error processing {file_path}: {e}")
             log_event("data_processing_error", None, {
@@ -223,27 +150,38 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
             })
 def analyze_facility_distribution(facilities_df):
-    """Analyze healthcare facility distribution by type and location."""
     try:
         # Filter to Alberta if province column exists
-        if 'province' in facilities_df.columns:
-            ab_facilities = facilities_df[facilities_df['province'] == 'ab']
         else:
-            ab_facilities = facilities_df
         # Facility type frequency
-        type_counts = ab_facilities['facility_type'].value_counts().to_dict()
         # Top cities by facility count
-        if 'city' in ab_facilities.columns:
-            city_counts = ab_facilities['city'].value_counts().head(5)
             top_cities = city_counts.index.tolist()
             # Breakdown by facility type for top cities
             city_breakdown = {}
             for city in top_cities:
-                city_data = ab_facilities[ab_facilities['city'] == city]
-                city_breakdown[city] = city_data['facility_type'].value_counts().to_dict()
         else:
             top_cities = []
             city_breakdown = {}
@@ -252,35 +190,74 @@ def analyze_facility_distribution(facilities_df):
             "total_facilities": len(ab_facilities),
             "type_distribution": type_counts,
             "top_cities": top_cities,
-            "city_breakdown": city_breakdown
         }
     except Exception as e:
         log_event("facility_analysis_error", None, {"error": str(e)})
         return {"error": str(e)}
 def analyze_bed_capacity(beds_df):
-    """Analyze bed capacity by zone and identify trends."""
     try:
         # Filter to Alberta if province column exists
-        if 'province' in beds_df.columns:
-            ab_beds = beds_df[beds_df['province'] == 'alberta']
         else:
-            ab_beds = beds_df
-        # Calculate zone-level summaries
-        if 'zone' in ab_beds.columns:
-            zone_summary = ab_beds.groupby('zone').agg({
-                'beds_current': 'sum',
-                'beds_prev': 'sum',
                 'bed_change': 'sum'
             }).reset_index()
-            # Calculate percentage change
-            zone_summary['percent_change'] = (zone_summary['bed_change'] / zone_summary['beds_prev']) * 100
             # Find zones with largest changes
-            max_abs_decrease = zone_summary.loc[zone_summary['bed_change'].idxmin()]
-            max_pct_decrease = zone_summary.loc[zone_summary['percent_change'].idxmin()]
             # Identify facilities with largest declines
             facilities_decline = ab_beds.sort_values('bed_change').head(5)
@@ -291,39 +268,62 @@ def analyze_bed_capacity(beds_df):
             facilities_decline = pd.DataFrame()
         return {
-            "zone_summary": zone_summary.to_dict('records'),
-            "max_absolute_decrease": max_abs_decrease.to_dict(),
-            "max_percentage_decrease": max_pct_decrease.to_dict(),
-            "facilities_with_largest_declines": facilities_decline.to_dict('records')
         }
     except Exception as e:
         log_event("bed_analysis_error", None, {"error": str(e)})
         return {"error": str(e)}
 def assess_long_term_capacity(facilities_df, beds_df, zone_name):
-    """Assess long-term care capacity in a specific zone."""
     try:
         # Get facilities in the specified zone
-        if 'zone' in facilities_df.columns:
-            zone_facilities = facilities_df[facilities_df['zone'] == zone_name]
         else:
             # If zone column not available, use province
-            zone_facilities = facilities_df[facilities_df['province'] == 'ab']
         # Find major city in zone
-        if 'city' in zone_facilities.columns:
-            city_counts = zone_facilities['city'].value_counts()
-            major_city = city_counts.index[0] if len(city_counts) > 0 else None
-            if major_city:
-                city_facilities = zone_facilities[zone_facilities['city'] == major_city]
                 # Count facility types
-                facility_counts = city_facilities['facility_type'].value_counts().to_dict()
                 # Calculate ratio of nursing/residential to hospitals
-                hospitals = facility_counts.get('Hospitals', 0)
-                nursing = facility_counts.get('Nursing and residential care facilities', 0)
                 ratio = nursing / hospitals if hospitals > 0 else 0
                 # Assess capacity
@@ -334,7 +334,12 @@ def assess_long_term_capacity(facilities_df, beds_df, zone_name):
                     "major_city": major_city,
                     "facility_counts": facility_counts,
                     "nursing_to_hospital_ratio": ratio,
-                    "capacity_assessment": capacity_assessment
                 }
         return {"error": "Could not determine major city or facility counts"}
@@ -352,22 +357,24 @@ def generate_operational_recommendations(analysis_results):
         if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
             zone = bed_data['max_percentage_decrease'].get('zone', '')
             decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
-            recommendations.append({
-                "title": f"Restore staffed beds in {zone} Zone",
-                "description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.",
-                "data_source": "Bed capacity analysis"
-            })
     # Recommendation 2: Expand long-term care capacity
     if 'long_term_care' in analysis_results:
         ltc_data = analysis_results['long_term_care']
         if ltc_data.get('capacity_assessment') == 'insufficient':
             city = ltc_data.get('major_city', '')
-            recommendations.append({
-                "title": f"Expand long-term care capacity in {city}",
-                "description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.",
-                "data_source": "Long-term care capacity assessment"
-            })
     # Recommendation 3: Implement surge plans
     if 'bed_capacity' in analysis_results:
@@ -400,7 +407,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
             response += f"Error in facility distribution analysis: {fd['error']}\n\n"
         else:
             response += "## 1. Data Preparation\n\n"
-            response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
             if 'type_distribution' in fd:
                 response += "### Facility Type Distribution\n\n"
@@ -432,13 +439,13 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
             if 'zone_summary' in bc and bc['zone_summary']:
                 response += "### Bed Capacity by Zone\n\n"
-                response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
-                response += "|------|---------------|---------------|-----------------|----------------|\n"
                 for zone_data in bc['zone_summary']:
-                    zone = zone_data.get('zone', 'N/A')
-                    current = zone_data.get('beds_current', 'N/A')
-                    prev = zone_data.get('beds_prev', 'N/A')
                     change = zone_data.get('bed_change', 'N/A')
                     pct = zone_data.get('percent_change', 'N/A')
                     response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
@@ -448,8 +455,8 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
                'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
                 abs_dec = bc['max_absolute_decrease']
                 pct_dec = bc['max_percentage_decrease']
-                response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
-                response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
             if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
                 response += "### Facilities with Largest Bed Declines\n\n"
@@ -458,7 +465,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
                 for facility in bc['facilities_with_largest_declines']:
                     name = facility.get('facility_name', 'N/A')
-                    zone = facility.get('zone', 'N/A')
                     teaching = facility.get('teaching_status', 'N/A')
                     change = facility.get('bed_change', 'N/A')
                     response += f"| {name} | {zone} | {teaching} | {change} |\n"
@@ -511,61 +518,63 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
     response += "## Provenance\n\n"
     response += "This analysis is based on:\n"
     response += "- Scenario description provided by the user\n"
-    response += "- Uploaded data files: all_health_facilities.csv and clean_beds_data.csv\n"
     response += "- Calculations performed on the provided data\n"
     return response
 def handle_healthcare_scenario(scenario_text, data_registry, history):
-    """Handle healthcare-specific scenario analysis."""
     try:
-        # Initialize analysis results
         results = {}
-        # Task 1: Data preparation
         facilities_df = None
-        beds_df = None
-        # Find the relevant data files
-        for file_name in data_registry.names():
-            df = data_registry.get(file_name)
-            if df is not None:
-                if 'facility' in file_name.lower() or 'health' in file_name.lower():
-                    facilities_df = df
-                elif 'bed' in file_name.lower():
-                    beds_df = df
         # Log what we found
         log_event("data_files_found", None, {
             "facilities": facilities_df is not None,
             "beds": beds_df is not None,
-            "files": data_registry.names()
         })
         if facilities_df is not None:
             results['facility_distribution'] = analyze_facility_distribution(facilities_df)
-        # Task 2: Bed capacity analysis
         if beds_df is not None:
             results['bed_capacity'] = analyze_bed_capacity(beds_df)
-            # Task 3: Long-term care capacity assessment
             if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
-                worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
-                if worst_zone and facilities_df is not None:
-                    results['long_term_care'] = assess_long_term_capacity(
-                        facilities_df,
-                        beds_df,
-                        worst_zone
-                    )
-        # Generate operational recommendations
         recommendations = generate_operational_recommendations(results)
-        # Generate future AI integration discussion
         ai_integration = generate_ai_integration_discussion(results)
-        # Compile final response
         response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
         return response
@@ -650,7 +659,7 @@ def cohere_chat(message, history):
         resp = client.chat(
             model="command-r7b-12-2024",
             message=prompt,
-            temperature=0.3,
             max_tokens=MAX_NEW_TOKENS,
         )
         if hasattr(resp, "text") and resp.text: return resp.text.strip()
@@ -675,8 +684,9 @@ def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
     with torch.no_grad():
         out = model.generate(
             input_ids=input_ids, max_new_tokens=max_new_tokens,
-            do_sample=True, temperature=0.3, top_p=0.9,
-            repetition_penalty=1.15,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
@@ -705,6 +715,13 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         if uploaded_files_paths:
             process_healthcare_data(uploaded_files_paths, data_registry)
             # Update session RAG with CSV columns
             for file_name in data_registry.names():
                 if file_name.endswith('.csv'):
@@ -770,7 +787,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
-            gr.Markdown('<div class="hint">Upload healthcare data files (CSV, PDF, etc.) and describe your scenario for comprehensive analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
@@ -778,7 +795,8 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
         with gr.Row():
             uploads = gr.Files(
                 label="Upload healthcare data files",
-                file_types=["file"], file_count="multiple", height=68
             )
         with gr.Row(elem_id="chat-input-row"):
             msg = gr.Textbox(

+# app.py - Complete Dynamic Healthcare Scenario Analysis System
 import os, re, json, traceback, pathlib
 from functools import lru_cache
 from typing import List, Dict, Any, Tuple, Optional
 import regex as re2
 # Import necessary modules
+from settings import SNAPSHOT_PATH, PERSIST_CONTENT, HEALTHCARE_SETTINGS, MODEL_SETTINGS
 from audit_log import log_event, hash_summary
 from privacy import redact_text, safety_filter, refusal_reply
+from data_registry import DataRegistry
+from upload_ingest import extract_text_from_files
 # ---------- Writable caches (HF Spaces-safe) ----------
 HOME = pathlib.Path.home()
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
 # ---------- Config ----------
 MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
+MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", MODEL_SETTINGS.get("max_new_tokens", 2048)))
 # ---------- Generic System Prompt ----------
 SYSTEM_MASTER = """
 - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
 """.strip()
 # ---------- Session RAG Class (Simplified) ----------
 class SessionRAG:
     def __init__(self):
     t = (text or "").lower()
     # Check for healthcare keywords
+    has_healthcare_keywords = any(keyword in t for keyword in HEALTHCARE_SETTINGS["healthcare_keywords"])
     # Check for healthcare facility types
     has_facility_types = any(
+        any(ftype in t for ftype in ["hospital", "medical center", "health centre"]) or
+        any(ftype in t for ftype in ["nursing", "residential", "care facility", "long-term care"]) or
+        any(ftype in t for ftype in ["ambulatory", "clinic", "surgery center", "outpatient"])
     )
     # Check for healthcare-specific tasks
     """Process healthcare data files with robust error handling."""
     for file_path in uploaded_files_paths:
         try:
+            if data_registry.add_path(file_path):
+                print(f"Successfully processed: {file_path}")
+            else:
+                print(f"Failed to process: {file_path}")
         except Exception as e:
             print(f"Error processing {file_path}: {e}")
             log_event("data_processing_error", None, {
             })
 def analyze_facility_distribution(facilities_df):
+    """Analyze healthcare facility distribution dynamically."""
     try:
         # Filter to Alberta if province column exists
+        province_col = facilities_df.columns[facilities_df.columns.str.contains('province', case=False)]
+        if len(province_col) > 0:
+            province_col = province_col[0]
+            alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
+            ab_facilities = facilities_df[alberta_mask].copy()
         else:
+            ab_facilities = facilities_df.copy()
+        # Find facility type column
+        type_col = facilities_df.columns[facilities_df.columns.str.contains('type', case=False)]
+        if len(type_col) == 0:
+            return {"error": "Facility type column not found"}
+        type_col = type_col[0]
         # Facility type frequency
+        type_counts = ab_facilities[type_col].value_counts().to_dict()
         # Top cities by facility count
+        city_col = facilities_df.columns[facilities_df.columns.str.contains('city', case=False)]
+        if len(city_col) > 0:
+            city_col = city_col[0]
+            city_counts = ab_facilities[city_col].value_counts().head(5)
             top_cities = city_counts.index.tolist()
             # Breakdown by facility type for top cities
             city_breakdown = {}
             for city in top_cities:
+                city_data = ab_facilities[ab_facilities[city_col] == city]
+                city_breakdown[city] = city_data[type_col].value_counts().to_dict()
         else:
             top_cities = []
             city_breakdown = {}
             "total_facilities": len(ab_facilities),
             "type_distribution": type_counts,
             "top_cities": top_cities,
+            "city_breakdown": city_breakdown,
+            "columns_used": {
+                "facility_type": type_col,
+                "city": city_col[0] if len(city_col) > 0 else None,
+                "province": province_col[0] if len(province_col) > 0 else None
+            }
         }
     except Exception as e:
         log_event("facility_analysis_error", None, {"error": str(e)})
         return {"error": str(e)}
 def analyze_bed_capacity(beds_df):
+    """Analyze bed capacity dynamically."""
     try:
+        # Find required columns
+        current_cols = beds_df.columns[beds_df.columns.str.contains('current|2023|2024', case=False)]
+        prev_cols = beds_df.columns[beds_df.columns.str.contains('prev|2022|previous', case=False)]
+        if len(current_cols) == 0 or len(prev_cols) == 0:
+            return {"error": f"Missing required columns. Found current: {current_cols.tolist()}, prev: {prev_cols.tolist()}"}
+        current_col = current_cols[0]
+        prev_col = prev_cols[0]
+        # Ensure derived columns exist
+        if 'bed_change' not in beds_df.columns:
+            beds_df['bed_change'] = beds_df[current_col] - beds_df[prev_col]
+        if 'percent_change' not in beds_df.columns:
+            beds_df['percent_change'] = beds_df.apply(
+                lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
+                axis=1
+            )
         # Filter to Alberta if province column exists
+        province_col = beds_df.columns[beds_df.columns.str.contains('province', case=False)]
+        if len(province_col) > 0:
+            province_col = province_col[0]
+            alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
+            ab_beds = beds_df[alberta_mask].copy()
         else:
+            ab_beds = beds_df.copy()
+        # Calculate zone-level summaries if zone column exists
+        zone_col = beds_df.columns[beds_df.columns.str.contains('zone|region|area', case=False)]
+        if len(zone_col) > 0:
+            zone_col = zone_col[0]
+            zone_summary = ab_beds.groupby(zone_col).agg({
+                current_col: 'sum',
+                prev_col: 'sum',
                 'bed_change': 'sum'
             }).reset_index()
+            zone_summary['percent_change'] = zone_summary.apply(
+                lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
+                axis=1
+            )
             # Find zones with largest changes
+            if len(zone_summary) > 0:
+                max_abs_decrease_idx = zone_summary['bed_change'].idxmin()
+                max_pct_decrease_idx = zone_summary['percent_change'].idxmin()
+                max_abs_decrease = zone_summary.loc[max_abs_decrease_idx]
+                max_pct_decrease = zone_summary.loc[max_pct_decrease_idx]
+            else:
+                max_abs_decrease = {}
+                max_pct_decrease = {}
             # Identify facilities with largest declines
             facilities_decline = ab_beds.sort_values('bed_change').head(5)
             facilities_decline = pd.DataFrame()
         return {
+            "zone_summary": zone_summary.to_dict('records') if not zone_summary.empty else [],
+            "max_absolute_decrease": max_abs_decrease.to_dict() if isinstance(max_abs_decrease, pd.Series) else max_abs_decrease,
+            "max_percentage_decrease": max_pct_decrease.to_dict() if isinstance(max_pct_decrease, pd.Series) else max_pct_decrease,
+            "facilities_with_largest_declines": facilities_decline.to_dict('records') if not facilities_decline.empty else [],
+            "columns_used": {
+                "beds_current": current_col,
+                "beds_prev": prev_col,
+                "zone": zone_col[0] if len(zone_col) > 0 else None,
+                "province": province_col[0] if len(province_col) > 0 else None
+            }
         }
     except Exception as e:
         log_event("bed_analysis_error", None, {"error": str(e)})
         return {"error": str(e)}
 def assess_long_term_capacity(facilities_df, beds_df, zone_name):
+    """Assess long-term care capacity dynamically."""
     try:
+        # Find relevant columns
+        zone_col = facilities_df.columns[facilities_df.columns.str.contains('zone|region|area', case=False)]
+        city_col = facilities_df.columns[facilities_df.columns.str.contains('city|municipality|town', case=False)]
+        type_col = facilities_df.columns[facilities_df.columns.str.contains('type|category|class', case=False)]
+        if len(type_col) == 0:
+            return {"error": "Facility type column not found"}
+        type_col = type_col[0]
         # Get facilities in the specified zone
+        if len(zone_col) > 0:
+            zone_col = zone_col[0]
+            zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
         else:
             # If zone column not available, use province
+            province_col = facilities_df.columns[facilities_df.columns.str.contains('province', case=False)]
+            if len(province_col) > 0:
+                province_col = province_col[0]
+                alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
+                zone_facilities = facilities_df[alberta_mask].copy()
+            else:
+                zone_facilities = facilities_df.copy()
         # Find major city in zone
+        if len(city_col) > 0:
+            city_col = city_col[0]
+            city_counts = zone_facilities[city_col].value_counts()
+            if len(city_counts) > 0:
+                major_city = city_counts.index[0]
+                city_facilities = zone_facilities[zone_facilities[city_col] == major_city]
                 # Count facility types
+                facility_counts = city_facilities[type_col].value_counts().to_dict()
                 # Calculate ratio of nursing/residential to hospitals
+                hospitals = sum(count for key, count in facility_counts.items() if 'hospital' in key.lower())
+                nursing = sum(count for key, count in facility_counts.items() if any(word in key.lower() for word in ['nursing', 'residential', 'care']))
                 ratio = nursing / hospitals if hospitals > 0 else 0
                 # Assess capacity
                     "major_city": major_city,
                     "facility_counts": facility_counts,
                     "nursing_to_hospital_ratio": ratio,
+                    "capacity_assessment": capacity_assessment,
+                    "columns_used": {
+                        "zone": zone_col,
+                        "city": city_col,
+                        "facility_type": type_col
+                    }
                 }
         return {"error": "Could not determine major city or facility counts"}
         if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
             zone = bed_data['max_percentage_decrease'].get('zone', '')
             decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
+            if zone and decrease:
+                recommendations.append({
+                    "title": f"Restore staffed beds in {zone} Zone",
+                    "description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.",
+                    "data_source": "Bed capacity analysis"
+                })
     # Recommendation 2: Expand long-term care capacity
     if 'long_term_care' in analysis_results:
         ltc_data = analysis_results['long_term_care']
         if ltc_data.get('capacity_assessment') == 'insufficient':
             city = ltc_data.get('major_city', '')
+            if city:
+                recommendations.append({
+                    "title": f"Expand long-term care capacity in {city}",
+                    "description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.",
+                    "data_source": "Long-term care capacity assessment"
+                })
     # Recommendation 3: Implement surge plans
     if 'bed_capacity' in analysis_results:
             response += f"Error in facility distribution analysis: {fd['error']}\n\n"
         else:
             response += "## 1. Data Preparation\n\n"
+            response += f"Total healthcare facilities: {fd.get('total_facilities', 'N/A')}\n\n"
             if 'type_distribution' in fd:
                 response += "### Facility Type Distribution\n\n"
             if 'zone_summary' in bc and bc['zone_summary']:
                 response += "### Bed Capacity by Zone\n\n"
+                response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
+                response += "|------|---------------|-----------------|-----------------|----------------|\n"
                 for zone_data in bc['zone_summary']:
+                    zone = zone_data.get(bc['columns_used']['zone'], 'N/A') if bc['columns_used'].get('zone') else 'N/A'
+                    current = zone_data.get(bc['columns_used']['beds_current'], 'N/A')
+                    prev = zone_data.get(bc['columns_used']['beds_prev'], 'N/A')
                     change = zone_data.get('bed_change', 'N/A')
                     pct = zone_data.get('percent_change', 'N/A')
                     response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
                'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
                 abs_dec = bc['max_absolute_decrease']
                 pct_dec = bc['max_percentage_decrease']
+                response += f"**Zone with largest absolute decrease**: {abs_dec.get(bc['columns_used']['zone'], 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
+                response += f"**Zone with largest percentage decrease**: {pct_dec.get(bc['columns_used']['zone'], 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
             if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
                 response += "### Facilities with Largest Bed Declines\n\n"
                 for facility in bc['facilities_with_largest_declines']:
                     name = facility.get('facility_name', 'N/A')
+                    zone = facility.get(bc['columns_used']['zone'], 'N/A') if bc['columns_used'].get('zone') else 'N/A'
                     teaching = facility.get('teaching_status', 'N/A')
                     change = facility.get('bed_change', 'N/A')
                     response += f"| {name} | {zone} | {teaching} | {change} |\n"
     response += "## Provenance\n\n"
     response += "This analysis is based on:\n"
     response += "- Scenario description provided by the user\n"
+    response += "- Uploaded data files\n"
     response += "- Calculations performed on the provided data\n"
     return response
 def handle_healthcare_scenario(scenario_text, data_registry, history):
+    """Handle healthcare scenarios dynamically."""
     try:
         results = {}
+        # Dynamically identify relevant files
+        facility_files = data_registry.get_data_by_type('facility_data')
+        bed_files = data_registry.get_data_by_type('bed_data')
+        # Use the first file of each type (can be enhanced to use multiple)
         facilities_df = None
+        if facility_files:
+            facilities_df = data_registry.get(facility_files[0])
+        beds_df = None
+        if bed_files:
+            beds_df = data_registry.get(bed_files[0])
         # Log what we found
         log_event("data_files_found", None, {
             "facilities": facilities_df is not None,
             "beds": beds_df is not None,
+            "facility_files": facility_files,
+            "bed_files": bed_files
         })
+        # Perform analyses based on available data
         if facilities_df is not None:
             results['facility_distribution'] = analyze_facility_distribution(facilities_df)
         if beds_df is not None:
             results['bed_capacity'] = analyze_bed_capacity(beds_df)
+            # Long-term care assessment if we have both data types
             if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
+                zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
+                if zone_col:
+                    worst_zone = results['bed_capacity']['max_percentage_decrease'].get(zone_col, '')
+                    if worst_zone and facilities_df is not None:
+                        results['long_term_care'] = assess_long_term_capacity(
+                            facilities_df,
+                            beds_df,
+                            worst_zone
+                        )
+        # Generate recommendations
         recommendations = generate_operational_recommendations(results)
+        # Generate AI integration discussion
         ai_integration = generate_ai_integration_discussion(results)
+        # Format response
         response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
         return response
         resp = client.chat(
             model="command-r7b-12-2024",
             message=prompt,
+            temperature=MODEL_SETTINGS.get("temperature", 0.3),
             max_tokens=MAX_NEW_TOKENS,
         )
         if hasattr(resp, "text") and resp.text: return resp.text.strip()
     with torch.no_grad():
         out = model.generate(
             input_ids=input_ids, max_new_tokens=max_new_tokens,
+            do_sample=True, temperature=MODEL_SETTINGS.get("temperature", 0.3),
+            top_p=MODEL_SETTINGS.get("top_p", 0.9),
+            repetition_penalty=MODEL_SETTINGS.get("repetition_penalty", 1.15),
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
         )
         if uploaded_files_paths:
             process_healthcare_data(uploaded_files_paths, data_registry)
+            # Also extract text for RAG
+            ing = extract_text_from_files(uploaded_files_paths)
+            if ing.get("chunks"):
+                session_rag.add_docs(ing["chunks"])
+            if ing.get("artifacts"):
+                session_rag.register_artifacts(ing["artifacts"])
             # Update session RAG with CSV columns
             for file_name in data_registry.names():
                 if file_name.endswith('.csv'):
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
+            gr.Markdown('<div class="hint">Upload healthcare data files (CSV, Excel, JSON, PDF, etc.) and describe your scenario for comprehensive analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
         with gr.Row():
             uploads = gr.Files(
                 label="Upload healthcare data files",
+                file_types=HEALTHCARE_SETTINGS["supported_file_types"],
+                file_count="multiple", height=68
             )
         with gr.Row(elem_id="chat-input-row"):
             msg = gr.Textbox(