Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 22

Commit

fa74f5a

verified ·

1 Parent(s): fa2487d

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -82

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import gradio as gr
 import torch
 import regex as re2
-# Import necessary modules (assuming they exist in your environment)
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from audit_log import log_event, hash_summary
 from privacy import redact_text, safety_filter, refusal_reply
@@ -149,7 +149,6 @@ class SessionRAG:
         return self.csv_columns
     def retrieve(self, query, k=5):
-        # Simple retrieval - return top k documents
         return self.docs[:k] if self.docs else []
     def clear(self):
@@ -203,7 +202,7 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
                 df = pd.read_csv(file_path)
                 # Standardize column names
-                df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]
                 # Handle healthcare-specific data structures
                 if 'facility_name' in df.columns:
@@ -350,7 +349,7 @@ def generate_operational_recommendations(analysis_results):
     # Recommendation 1: Address bed capacity issues
     if 'bed_capacity' in analysis_results:
         bed_data = analysis_results['bed_capacity']
-        if 'max_percentage_decrease' in bed_data:
             zone = bed_data['max_percentage_decrease'].get('zone', '')
             decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
             recommendations.append({
@@ -396,91 +395,107 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
     # Data Preparation Section
     if 'facility_distribution' in results:
         fd = results['facility_distribution']
-        response += "## 1. Data Preparation\n\n"
-        response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
-        if 'type_distribution' in fd:
-            response += "### Facility Type Distribution\n\n"
-            for ftype, count in fd['type_distribution'].items():
-                response += f"- {ftype}: {count}\n"
-            response += "\n"
-        if 'city_breakdown' in fd:
-            response += "### Top Cities by Facility Count\n\n"
-            response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
-            response += "|------|-----------|-------------------|------------|-------|\n"
-            for city, breakdown in fd['city_breakdown'].items():
-                hospitals = breakdown.get('Hospitals', 0)
-                nursing = breakdown.get('Nursing and residential care facilities', 0)
-                ambulatory = breakdown.get('Ambulatory health care services', 0)
-                total = hospitals + nursing + ambulatory
-                response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
-            response += "\n"
     # Bed Capacity Analysis Section
     if 'bed_capacity' in results:
         bc = results['bed_capacity']
-        response += "## 2. Bed Capacity Analysis\n\n"
-        if 'zone_summary' in bc:
-            response += "### Bed Capacity by Zone\n\n"
-            response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
-            response += "|------|---------------|---------------|-----------------|----------------|\n"
-            for zone_data in bc['zone_summary']:
-                zone = zone_data.get('zone', 'N/A')
-                current = zone_data.get('beds_current', 'N/A')
-                prev = zone_data.get('beds_prev', 'N/A')
-                change = zone_data.get('bed_change', 'N/A')
-                pct = zone_data.get('percent_change', 'N/A')
-                response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
-            response += "\n"
-        if 'max_absolute_decrease' in bc and 'max_percentage_decrease' in bc:
-            abs_dec = bc['max_absolute_decrease']
-            pct_dec = bc['max_percentage_decrease']
-            response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
-            response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
-        if 'facilities_with_largest_declines' in bc:
-            response += "### Facilities with Largest Bed Declines\n\n"
-            response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
-            response += "|----------|------|----------------|-----------|\n"
-            for facility in bc['facilities_with_largest_declines']:
-                name = facility.get('facility_name', 'N/A')
-                zone = facility.get('zone', 'N/A')
-                teaching = facility.get('teaching_status', 'N/A')
-                change = facility.get('bed_change', 'N/A')
-                response += f"| {name} | {zone} | {teaching} | {change} |\n"
-            response += "\n"
     # Long-term Care Section
     if 'long_term_care' in results:
         ltc = results['long_term_care']
-        response += "## 3. Long-Term Care Capacity Assessment\n\n"
-        zone = ltc.get('zone', 'N/A')
-        city = ltc.get('major_city', 'N/A')
-        ratio = ltc.get('nursing_to_hospital_ratio', 0)
-        assessment = ltc.get('capacity_assessment', 'N/A')
-        response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
-        response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
-        if 'facility_counts' in ltc:
-            response += "### Facility Counts\n\n"
-            for ftype, count in ltc['facility_counts'].items():
-                response += f"- {ftype}: {count}\n"
-            response += "\n"
     # Recommendations Section
     response += "## 4. Operational Recommendations\n\n"
-    for rec in recommendations:
-        response += f"### {rec['title']}\n\n"
-        response += f"{rec['description']}\n\n"
-        response += f"*Data source: {rec['data_source']}*\n\n"
     # AI Integration Section
     response += "## 5. Future Integration for Augmented AI\n\n"
@@ -496,7 +511,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
     response += "## Provenance\n\n"
     response += "This analysis is based on:\n"
     response += "- Scenario description provided by the user\n"
-    response += "- Uploaded data files\n"
     response += "- Calculations performed on the provided data\n"
     return response
@@ -511,12 +526,21 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
         facilities_df = None
         beds_df = None
         for file_name in data_registry.names():
             df = data_registry.get(file_name)
-            if 'facility' in file_name.lower() or 'health' in file_name.lower():
-                facilities_df = df
-            elif 'bed' in file_name.lower():
-                beds_df = df
         if facilities_df is not None:
             results['facility_distribution'] = analyze_facility_distribution(facilities_df)
@@ -526,7 +550,7 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
             results['bed_capacity'] = analyze_bed_capacity(beds_df)
             # Task 3: Long-term care capacity assessment
-            if 'zone' in beds_df.columns and 'max_percentage_decrease' in results['bed_capacity']:
                 worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
                 if worst_zone and facilities_df is not None:
                     results['long_term_care'] = assess_long_term_capacity(
@@ -694,7 +718,6 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             return history + [(user_msg, response)], False
         # For non-healthcare scenarios, use the original logic
-        # ... (Original non-healthcare scenario handling would go here)
         # For now, provide a fallback response
         response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
         return history + [(user_msg, response)], awaiting_answers

 import torch
 import regex as re2
+# Import necessary modules
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from audit_log import log_event, hash_summary
 from privacy import redact_text, safety_filter, refusal_reply
         return self.csv_columns
     def retrieve(self, query, k=5):
         return self.docs[:k] if self.docs else []
     def clear(self):
                 df = pd.read_csv(file_path)
                 # Standardize column names
+                df.columns = [col.strip().lower().replace(' ', '_').replace('-', '_') for col in df.columns]
                 # Handle healthcare-specific data structures
                 if 'facility_name' in df.columns:
     # Recommendation 1: Address bed capacity issues
     if 'bed_capacity' in analysis_results:
         bed_data = analysis_results['bed_capacity']
+        if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
             zone = bed_data['max_percentage_decrease'].get('zone', '')
             decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
             recommendations.append({
     # Data Preparation Section
     if 'facility_distribution' in results:
         fd = results['facility_distribution']
+        if 'error' in fd:
+            response += "## 1. Data Preparation\n\n"
+            response += f"Error in facility distribution analysis: {fd['error']}\n\n"
+        else:
+            response += "## 1. Data Preparation\n\n"
+            response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
+            if 'type_distribution' in fd:
+                response += "### Facility Type Distribution\n\n"
+                for ftype, count in fd['type_distribution'].items():
+                    response += f"- {ftype}: {count}\n"
+                response += "\n"
+            if 'city_breakdown' in fd:
+                response += "### Top Cities by Facility Count\n\n"
+                response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
+                response += "|------|-----------|-------------------|------------|-------|\n"
+                for city, breakdown in fd['city_breakdown'].items():
+                    hospitals = breakdown.get('Hospitals', 0)
+                    nursing = breakdown.get('Nursing and residential care facilities', 0)
+                    ambulatory = breakdown.get('Ambulatory health care services', 0)
+                    total = hospitals + nursing + ambulatory
+                    response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
+                response += "\n"
     # Bed Capacity Analysis Section
     if 'bed_capacity' in results:
         bc = results['bed_capacity']
+        if 'error' in bc:
+            response += "## 2. Bed Capacity Analysis\n\n"
+            response += f"Error in bed capacity analysis: {bc['error']}\n\n"
+        else:
+            response += "## 2. Bed Capacity Analysis\n\n"
+            if 'zone_summary' in bc and bc['zone_summary']:
+                response += "### Bed Capacity by Zone\n\n"
+                response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
+                response += "|------|---------------|---------------|-----------------|----------------|\n"
+                for zone_data in bc['zone_summary']:
+                    zone = zone_data.get('zone', 'N/A')
+                    current = zone_data.get('beds_current', 'N/A')
+                    prev = zone_data.get('beds_prev', 'N/A')
+                    change = zone_data.get('bed_change', 'N/A')
+                    pct = zone_data.get('percent_change', 'N/A')
+                    response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
+                response += "\n"
+            if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
+               'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
+                abs_dec = bc['max_absolute_decrease']
+                pct_dec = bc['max_percentage_decrease']
+                response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
+                response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
+            if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
+                response += "### Facilities with Largest Bed Declines\n\n"
+                response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
+                response += "|----------|------|----------------|-----------|\n"
+                for facility in bc['facilities_with_largest_declines']:
+                    name = facility.get('facility_name', 'N/A')
+                    zone = facility.get('zone', 'N/A')
+                    teaching = facility.get('teaching_status', 'N/A')
+                    change = facility.get('bed_change', 'N/A')
+                    response += f"| {name} | {zone} | {teaching} | {change} |\n"
+                response += "\n"
     # Long-term Care Section
     if 'long_term_care' in results:
         ltc = results['long_term_care']
+        if 'error' in ltc:
+            response += "## 3. Long-Term Care Capacity Assessment\n\n"
+            response += f"Error in long-term care assessment: {ltc['error']}\n\n"
+        else:
+            response += "## 3. Long-Term Care Capacity Assessment\n\n"
+            zone = ltc.get('zone', 'N/A')
+            city = ltc.get('major_city', 'N/A')
+            ratio = ltc.get('nursing_to_hospital_ratio', 0)
+            assessment = ltc.get('capacity_assessment', 'N/A')
+            response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
+            response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
+            if 'facility_counts' in ltc:
+                response += "### Facility Counts\n\n"
+                for ftype, count in ltc['facility_counts'].items():
+                    response += f"- {ftype}: {count}\n"
+                response += "\n"
     # Recommendations Section
     response += "## 4. Operational Recommendations\n\n"
+    if recommendations:
+        for rec in recommendations:
+            response += f"### {rec['title']}\n\n"
+            response += f"{rec['description']}\n\n"
+            response += f"*Data source: {rec['data_source']}*\n\n"
+    else:
+        response += "No specific recommendations could be generated due to data limitations.\n\n"
     # AI Integration Section
     response += "## 5. Future Integration for Augmented AI\n\n"
     response += "## Provenance\n\n"
     response += "This analysis is based on:\n"
     response += "- Scenario description provided by the user\n"
+    response += "- Uploaded data files: all_health_facilities.csv and clean_beds_data.csv\n"
     response += "- Calculations performed on the provided data\n"
     return response
         facilities_df = None
         beds_df = None
+        # Find the relevant data files
         for file_name in data_registry.names():
             df = data_registry.get(file_name)
+            if df is not None:
+                if 'facility' in file_name.lower() or 'health' in file_name.lower():
+                    facilities_df = df
+                elif 'bed' in file_name.lower():
+                    beds_df = df
+        # Log what we found
+        log_event("data_files_found", None, {
+            "facilities": facilities_df is not None,
+            "beds": beds_df is not None,
+            "files": data_registry.names()
+        })
         if facilities_df is not None:
             results['facility_distribution'] = analyze_facility_distribution(facilities_df)
             results['bed_capacity'] = analyze_bed_capacity(beds_df)
             # Task 3: Long-term care capacity assessment
+            if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
                 worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
                 if worst_zone and facilities_df is not None:
                     results['long_term_care'] = assess_long_term_capacity(
             return history + [(user_msg, response)], False
         # For non-healthcare scenarios, use the original logic
         # For now, provide a fallback response
         response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
         return history + [(user_msg, response)], awaiting_answers