Rajan Sharma commited on
Commit
fa74f5a
·
verified ·
1 Parent(s): fa2487d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -82
app.py CHANGED
@@ -9,7 +9,7 @@ import gradio as gr
9
  import torch
10
  import regex as re2
11
 
12
- # Import necessary modules (assuming they exist in your environment)
13
  from settings import SNAPSHOT_PATH, PERSIST_CONTENT
14
  from audit_log import log_event, hash_summary
15
  from privacy import redact_text, safety_filter, refusal_reply
@@ -149,7 +149,6 @@ class SessionRAG:
149
  return self.csv_columns
150
 
151
  def retrieve(self, query, k=5):
152
- # Simple retrieval - return top k documents
153
  return self.docs[:k] if self.docs else []
154
 
155
  def clear(self):
@@ -203,7 +202,7 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
203
  df = pd.read_csv(file_path)
204
 
205
  # Standardize column names
206
- df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]
207
 
208
  # Handle healthcare-specific data structures
209
  if 'facility_name' in df.columns:
@@ -350,7 +349,7 @@ def generate_operational_recommendations(analysis_results):
350
  # Recommendation 1: Address bed capacity issues
351
  if 'bed_capacity' in analysis_results:
352
  bed_data = analysis_results['bed_capacity']
353
- if 'max_percentage_decrease' in bed_data:
354
  zone = bed_data['max_percentage_decrease'].get('zone', '')
355
  decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
356
  recommendations.append({
@@ -396,91 +395,107 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
396
  # Data Preparation Section
397
  if 'facility_distribution' in results:
398
  fd = results['facility_distribution']
399
- response += "## 1. Data Preparation\n\n"
400
- response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
401
-
402
- if 'type_distribution' in fd:
403
- response += "### Facility Type Distribution\n\n"
404
- for ftype, count in fd['type_distribution'].items():
405
- response += f"- {ftype}: {count}\n"
406
- response += "\n"
407
-
408
- if 'city_breakdown' in fd:
409
- response += "### Top Cities by Facility Count\n\n"
410
- response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
411
- response += "|------|-----------|-------------------|------------|-------|\n"
412
 
413
- for city, breakdown in fd['city_breakdown'].items():
414
- hospitals = breakdown.get('Hospitals', 0)
415
- nursing = breakdown.get('Nursing and residential care facilities', 0)
416
- ambulatory = breakdown.get('Ambulatory health care services', 0)
417
- total = hospitals + nursing + ambulatory
418
- response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
419
- response += "\n"
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  # Bed Capacity Analysis Section
422
  if 'bed_capacity' in results:
423
  bc = results['bed_capacity']
424
- response += "## 2. Bed Capacity Analysis\n\n"
425
-
426
- if 'zone_summary' in bc:
427
- response += "### Bed Capacity by Zone\n\n"
428
- response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
429
- response += "|------|---------------|---------------|-----------------|----------------|\n"
430
 
431
- for zone_data in bc['zone_summary']:
432
- zone = zone_data.get('zone', 'N/A')
433
- current = zone_data.get('beds_current', 'N/A')
434
- prev = zone_data.get('beds_prev', 'N/A')
435
- change = zone_data.get('bed_change', 'N/A')
436
- pct = zone_data.get('percent_change', 'N/A')
437
- response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
438
- response += "\n"
439
-
440
- if 'max_absolute_decrease' in bc and 'max_percentage_decrease' in bc:
441
- abs_dec = bc['max_absolute_decrease']
442
- pct_dec = bc['max_percentage_decrease']
443
- response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
444
- response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
445
-
446
- if 'facilities_with_largest_declines' in bc:
447
- response += "### Facilities with Largest Bed Declines\n\n"
448
- response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
449
- response += "|----------|------|----------------|-----------|\n"
 
450
 
451
- for facility in bc['facilities_with_largest_declines']:
452
- name = facility.get('facility_name', 'N/A')
453
- zone = facility.get('zone', 'N/A')
454
- teaching = facility.get('teaching_status', 'N/A')
455
- change = facility.get('bed_change', 'N/A')
456
- response += f"| {name} | {zone} | {teaching} | {change} |\n"
457
- response += "\n"
 
 
 
 
 
458
 
459
  # Long-term Care Section
460
  if 'long_term_care' in results:
461
  ltc = results['long_term_care']
462
- response += "## 3. Long-Term Care Capacity Assessment\n\n"
463
-
464
- zone = ltc.get('zone', 'N/A')
465
- city = ltc.get('major_city', 'N/A')
466
- ratio = ltc.get('nursing_to_hospital_ratio', 0)
467
- assessment = ltc.get('capacity_assessment', 'N/A')
468
-
469
- response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
470
- response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
471
-
472
- if 'facility_counts' in ltc:
473
- response += "### Facility Counts\n\n"
474
- for ftype, count in ltc['facility_counts'].items():
475
- response += f"- {ftype}: {count}\n"
476
- response += "\n"
 
 
 
 
477
 
478
  # Recommendations Section
479
  response += "## 4. Operational Recommendations\n\n"
480
- for rec in recommendations:
481
- response += f"### {rec['title']}\n\n"
482
- response += f"{rec['description']}\n\n"
483
- response += f"*Data source: {rec['data_source']}*\n\n"
 
 
 
484
 
485
  # AI Integration Section
486
  response += "## 5. Future Integration for Augmented AI\n\n"
@@ -496,7 +511,7 @@ def format_healthcare_analysis_response(scenario_text, results, recommendations,
496
  response += "## Provenance\n\n"
497
  response += "This analysis is based on:\n"
498
  response += "- Scenario description provided by the user\n"
499
- response += "- Uploaded data files\n"
500
  response += "- Calculations performed on the provided data\n"
501
 
502
  return response
@@ -511,12 +526,21 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
511
  facilities_df = None
512
  beds_df = None
513
 
 
514
  for file_name in data_registry.names():
515
  df = data_registry.get(file_name)
516
- if 'facility' in file_name.lower() or 'health' in file_name.lower():
517
- facilities_df = df
518
- elif 'bed' in file_name.lower():
519
- beds_df = df
 
 
 
 
 
 
 
 
520
 
521
  if facilities_df is not None:
522
  results['facility_distribution'] = analyze_facility_distribution(facilities_df)
@@ -526,7 +550,7 @@ def handle_healthcare_scenario(scenario_text, data_registry, history):
526
  results['bed_capacity'] = analyze_bed_capacity(beds_df)
527
 
528
  # Task 3: Long-term care capacity assessment
529
- if 'zone' in beds_df.columns and 'max_percentage_decrease' in results['bed_capacity']:
530
  worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
531
  if worst_zone and facilities_df is not None:
532
  results['long_term_care'] = assess_long_term_capacity(
@@ -694,7 +718,6 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
694
  return history + [(user_msg, response)], False
695
 
696
  # For non-healthcare scenarios, use the original logic
697
- # ... (Original non-healthcare scenario handling would go here)
698
  # For now, provide a fallback response
699
  response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
700
  return history + [(user_msg, response)], awaiting_answers
 
9
  import torch
10
  import regex as re2
11
 
12
+ # Import necessary modules
13
  from settings import SNAPSHOT_PATH, PERSIST_CONTENT
14
  from audit_log import log_event, hash_summary
15
  from privacy import redact_text, safety_filter, refusal_reply
 
149
  return self.csv_columns
150
 
151
  def retrieve(self, query, k=5):
 
152
  return self.docs[:k] if self.docs else []
153
 
154
  def clear(self):
 
202
  df = pd.read_csv(file_path)
203
 
204
  # Standardize column names
205
+ df.columns = [col.strip().lower().replace(' ', '_').replace('-', '_') for col in df.columns]
206
 
207
  # Handle healthcare-specific data structures
208
  if 'facility_name' in df.columns:
 
349
  # Recommendation 1: Address bed capacity issues
350
  if 'bed_capacity' in analysis_results:
351
  bed_data = analysis_results['bed_capacity']
352
+ if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
353
  zone = bed_data['max_percentage_decrease'].get('zone', '')
354
  decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
355
  recommendations.append({
 
395
  # Data Preparation Section
396
  if 'facility_distribution' in results:
397
  fd = results['facility_distribution']
398
+ if 'error' in fd:
399
+ response += "## 1. Data Preparation\n\n"
400
+ response += f"Error in facility distribution analysis: {fd['error']}\n\n"
401
+ else:
402
+ response += "## 1. Data Preparation\n\n"
403
+ response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
 
 
 
 
 
 
 
404
 
405
+ if 'type_distribution' in fd:
406
+ response += "### Facility Type Distribution\n\n"
407
+ for ftype, count in fd['type_distribution'].items():
408
+ response += f"- {ftype}: {count}\n"
409
+ response += "\n"
410
+
411
+ if 'city_breakdown' in fd:
412
+ response += "### Top Cities by Facility Count\n\n"
413
+ response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
414
+ response += "|------|-----------|-------------------|------------|-------|\n"
415
+
416
+ for city, breakdown in fd['city_breakdown'].items():
417
+ hospitals = breakdown.get('Hospitals', 0)
418
+ nursing = breakdown.get('Nursing and residential care facilities', 0)
419
+ ambulatory = breakdown.get('Ambulatory health care services', 0)
420
+ total = hospitals + nursing + ambulatory
421
+ response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
422
+ response += "\n"
423
 
424
  # Bed Capacity Analysis Section
425
  if 'bed_capacity' in results:
426
  bc = results['bed_capacity']
427
+ if 'error' in bc:
428
+ response += "## 2. Bed Capacity Analysis\n\n"
429
+ response += f"Error in bed capacity analysis: {bc['error']}\n\n"
430
+ else:
431
+ response += "## 2. Bed Capacity Analysis\n\n"
 
432
 
433
+ if 'zone_summary' in bc and bc['zone_summary']:
434
+ response += "### Bed Capacity by Zone\n\n"
435
+ response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
436
+ response += "|------|---------------|---------------|-----------------|----------------|\n"
437
+
438
+ for zone_data in bc['zone_summary']:
439
+ zone = zone_data.get('zone', 'N/A')
440
+ current = zone_data.get('beds_current', 'N/A')
441
+ prev = zone_data.get('beds_prev', 'N/A')
442
+ change = zone_data.get('bed_change', 'N/A')
443
+ pct = zone_data.get('percent_change', 'N/A')
444
+ response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
445
+ response += "\n"
446
+
447
+ if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
448
+ 'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
449
+ abs_dec = bc['max_absolute_decrease']
450
+ pct_dec = bc['max_percentage_decrease']
451
+ response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
452
+ response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
453
 
454
+ if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
455
+ response += "### Facilities with Largest Bed Declines\n\n"
456
+ response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
457
+ response += "|----------|------|----------------|-----------|\n"
458
+
459
+ for facility in bc['facilities_with_largest_declines']:
460
+ name = facility.get('facility_name', 'N/A')
461
+ zone = facility.get('zone', 'N/A')
462
+ teaching = facility.get('teaching_status', 'N/A')
463
+ change = facility.get('bed_change', 'N/A')
464
+ response += f"| {name} | {zone} | {teaching} | {change} |\n"
465
+ response += "\n"
466
 
467
  # Long-term Care Section
468
  if 'long_term_care' in results:
469
  ltc = results['long_term_care']
470
+ if 'error' in ltc:
471
+ response += "## 3. Long-Term Care Capacity Assessment\n\n"
472
+ response += f"Error in long-term care assessment: {ltc['error']}\n\n"
473
+ else:
474
+ response += "## 3. Long-Term Care Capacity Assessment\n\n"
475
+
476
+ zone = ltc.get('zone', 'N/A')
477
+ city = ltc.get('major_city', 'N/A')
478
+ ratio = ltc.get('nursing_to_hospital_ratio', 0)
479
+ assessment = ltc.get('capacity_assessment', 'N/A')
480
+
481
+ response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
482
+ response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
483
+
484
+ if 'facility_counts' in ltc:
485
+ response += "### Facility Counts\n\n"
486
+ for ftype, count in ltc['facility_counts'].items():
487
+ response += f"- {ftype}: {count}\n"
488
+ response += "\n"
489
 
490
  # Recommendations Section
491
  response += "## 4. Operational Recommendations\n\n"
492
+ if recommendations:
493
+ for rec in recommendations:
494
+ response += f"### {rec['title']}\n\n"
495
+ response += f"{rec['description']}\n\n"
496
+ response += f"*Data source: {rec['data_source']}*\n\n"
497
+ else:
498
+ response += "No specific recommendations could be generated due to data limitations.\n\n"
499
 
500
  # AI Integration Section
501
  response += "## 5. Future Integration for Augmented AI\n\n"
 
511
  response += "## Provenance\n\n"
512
  response += "This analysis is based on:\n"
513
  response += "- Scenario description provided by the user\n"
514
+ response += "- Uploaded data files: all_health_facilities.csv and clean_beds_data.csv\n"
515
  response += "- Calculations performed on the provided data\n"
516
 
517
  return response
 
526
  facilities_df = None
527
  beds_df = None
528
 
529
+ # Find the relevant data files
530
  for file_name in data_registry.names():
531
  df = data_registry.get(file_name)
532
+ if df is not None:
533
+ if 'facility' in file_name.lower() or 'health' in file_name.lower():
534
+ facilities_df = df
535
+ elif 'bed' in file_name.lower():
536
+ beds_df = df
537
+
538
+ # Log what we found
539
+ log_event("data_files_found", None, {
540
+ "facilities": facilities_df is not None,
541
+ "beds": beds_df is not None,
542
+ "files": data_registry.names()
543
+ })
544
 
545
  if facilities_df is not None:
546
  results['facility_distribution'] = analyze_facility_distribution(facilities_df)
 
550
  results['bed_capacity'] = analyze_bed_capacity(beds_df)
551
 
552
  # Task 3: Long-term care capacity assessment
553
+ if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
554
  worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
555
  if worst_zone and facilities_df is not None:
556
  results['long_term_care'] = assess_long_term_capacity(
 
718
  return history + [(user_msg, response)], False
719
 
720
  # For non-healthcare scenarios, use the original logic
 
721
  # For now, provide a fallback response
722
  response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
723
  return history + [(user_msg, response)], awaiting_answers