File size: 36,606 Bytes
f61e31c
b9b3e60
40db972
f61e31c
 
 
40db972
 
 
f61e31c
8f6e031
f61e31c
d51b275
 
f61e31c
d51b275
651c3c3
b391d29
b9b3e60
 
 
 
c0550c0
b9b3e60
 
 
 
b391d29
b9b3e60
 
 
d312515
 
 
b9b3e60
ae93cdb
 
 
 
9cdcc42
b391d29
40db972
2bdb6e6
40db972
 
 
 
 
795ccd0
40db972
f61e31c
 
 
 
 
 
 
795ccd0
f61e31c
 
 
 
 
1ca7039
ae93cdb
f61e31c
2bdb6e6
40db972
 
10cd369
40db972
f6cdc91
156e8ee
651c3c3
f6cdc91
b391d29
651c3c3
 
f6cdc91
 
 
 
 
 
 
156e8ee
 
f61e31c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40db972
ae93cdb
 
 
 
2bdb6e6
40db972
f61e31c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bdb6e6
40db972
b9b3e60
 
 
ae93cdb
40db972
651c3c3
b391d29
40db972
2bdb6e6
b391d29
 
 
e2b82fa
 
 
 
 
 
 
b9b3e60
651c3c3
 
b9b3e60
 
909d570
88ff30e
ae93cdb
40db972
88ff30e
651c3c3
 
 
 
 
 
88ff30e
2bdb6e6
b3902ee
2bdb6e6
c0550c0
2bdb6e6
88ff30e
 
 
ae93cdb
 
 
40db972
909d570
 
e2b82fa
f051f2e
 
40db972
ae93cdb
 
 
40db972
ae93cdb
40db972
 
 
c0550c0
ae93cdb
 
 
 
40db972
 
2bdb6e6
40db972
f61e31c
979b614
40db972
909d570
d51b275
744c807
 
d51b275
f3f85b6
f051f2e
744c807
f61e31c
f3f85b6
744c807
f61e31c
 
 
979b614
f61e31c
 
 
d18bfbf
f61e31c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9b3e60
40db972
f5e795d
 
 
 
 
f3f85b6
40db972
f61e31c
f051f2e
40db972
f6cdc91
20ec625
 
651c3c3
c0550c0
 
 
35f7e6e
c0550c0
35f7e6e
 
c0550c0
 
 
 
651c3c3
86217d1
35f7e6e
 
 
 
40db972
 
f61e31c
88ff30e
f6cdc91
c0550c0
 
f61e31c
c0550c0
 
f61e31c
c0550c0
 
 
 
35f7e6e
f61e31c
c0550c0
651c3c3
c0550c0
9dffee0
c0550c0
 
f61e31c
c0550c0
7eb389a
35f7e6e
c0550c0
 
 
f6cdc91
35f7e6e
 
 
c0550c0
35f7e6e
 
744c807
c0550c0
744c807
 
76fca29
979b614
 
 
 
744c807
979b614
 
 
 
744c807
eb04439
979b614
f5e795d
 
909d570
 
979b614
b9b3e60
909d570
f5e795d
 
b9b3e60
 
88ff30e
909d570
c0550c0
eb04439
979b614
 
c0550c0
909d570
76fca29
 
 
c0550c0
88ff30e
c0550c0
 
979b614
909d570
c0550c0
 
 
 
979b614
909d570
c0550c0
 
 
eb04439
979b614
909d570
88ff30e
979b614
909d570
88ff30e
744c807
156e8ee
c0550c0
909d570
76fca29
 
 
7eb389a
156e8ee
909d570
744c807
11a5624
b1e4a49
c1e2deb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
# app.py - Enhanced Healthcare Scenario Analysis System
import os, re, json, traceback, pathlib
from functools import lru_cache
from typing import List, Dict, Any, Tuple, Optional
import pandas as pd
import numpy as np

import gradio as gr
import torch
import regex as re2

# Import necessary modules (assuming they exist in your environment)
from settings import SNAPSHOT_PATH, PERSIST_CONTENT
from audit_log import log_event, hash_summary
from privacy import redact_text, safety_filter, refusal_reply

# ---------- Writable caches (HF Spaces-safe) ----------
HOME = pathlib.Path.home()
HF_HOME = str(HOME / ".cache" / "huggingface")
HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
ST_HOME = str(HOME / ".cache" / "sentence-transformers")
GRADIO_TMP = str(HOME / "app" / "gradio")
GRADIO_CACHE = GRADIO_TMP

os.environ.setdefault("HF_HOME", HF_HOME)
os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
os.environ.setdefault("HF_HUB_ENABLE_XET", "0")
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")

for p in [HF_HOME, HF_HUB_CACHE, HF_TRANSFORMERS, ST_HOME, GRADIO_TMP, GRADIO_CACHE]:
    try:
        os.makedirs(p, exist_ok=True)
    except Exception:
        pass

# Optional Cohere
try:
    import cohere
    _HAS_COHERE = True
except Exception:
    _HAS_COHERE = False

from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

# ---------- Healthcare-specific constants ----------
HEALTHCARE_KEYWORDS = [
    "hospital", "patient", "bed", "care", "health", "medical", "clinical",
    "facility", "nursing", "residential", "ambulatory", "healthcare", "occupancy",
    "capacity", "staff", "zone", "province", "alberta", "cihi", "odhf",
    "respiratory", "virus", "flu", "surge", "acute", "long-term", "ltc"
]

HEALTHCARE_FACILITY_TYPES = {
    "Hospitals": ["hospital", "medical center", "health centre"],
    "Nursing and residential care facilities": ["nursing", "residential", "care facility", "long-term care"],
    "Ambulatory health care services": ["ambulatory", "clinic", "surgery center", "outpatient"]
}

# ---------- Config ----------
MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))

# ---------- Generic System Prompt ----------
SYSTEM_MASTER = """
SYSTEM ROLE
You are an AI analytical system that provides data-driven insights for any scenario.
Absolute rules:
- Use ONLY information provided in this conversation (scenario text + uploaded files + user answers).
- Never invent data. If something required is missing after clarifications, write the literal token: INSUFFICIENT_DATA.
- Provide clear analysis with calculations, evidence, and reasoning.
- Maintain privacy safeguards (aggregate data; suppress small cohorts <10).
- Adapt your analysis approach to the specific scenario and data provided.
Formatting rules for structured analysis:
- Start with the header: "Structured Analysis"
- Organize analysis into logical sections based on the scenario requirements
- End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
""".strip()

# ---------- Data Registry Class ----------
class DataRegistry:
    def __init__(self):
        self.data = {}
        self.file_metadata = {}
    
    def add_path(self, path):
        try:
            file_name = os.path.basename(path)
            if file_name.endswith('.csv'):
                df = pd.read_csv(path)
                self.data[file_name] = df
                self.file_metadata[file_name] = {
                    'type': 'csv',
                    'columns': list(df.columns),
                    'shape': df.shape,
                    'sample': df.head(3).to_dict('records')
                }
                return True
        except Exception as e:
            print(f"Error adding {path}: {e}")
        return False
    
    def names(self):
        return list(self.data.keys())
    
    def get(self, name):
        return self.data.get(name)
    
    def summarize_for_prompt(self):
        if not self.data:
            return "No data files registered."
        
        summary = []
        for name, meta in self.file_metadata.items():
            summary.append(f"File: {name}")
            summary.append(f"Type: {meta['type']}")
            summary.append(f"Columns: {', '.join(meta['columns'])}")
            summary.append(f"Shape: {meta['shape']}")
            summary.append("")
        
        return "\n".join(summary)
    
    def clear(self):
        self.data.clear()
        self.file_metadata.clear()

# ---------- Session RAG Class (Simplified) ----------
class SessionRAG:
    def __init__(self):
        self.docs = []
        self.artifacts = []
        self.csv_columns = []
    
    def add_docs(self, chunks):
        self.docs.extend(chunks)
    
    def register_artifacts(self, artifacts):
        self.artifacts.extend(artifacts)
    
    def get_latest_csv_columns(self):
        return self.csv_columns
    
    def retrieve(self, query, k=5):
        # Simple retrieval - return top k documents
        return self.docs[:k] if self.docs else []
    
    def clear(self):
        self.docs.clear()
        self.artifacts.clear()
        self.csv_columns.clear()

# ---------- Healthcare-specific functions ----------
def is_healthcare_scenario(text: str, uploaded_files_paths) -> bool:
    """Detect if this is a healthcare scenario with specific indicators."""
    t = (text or "").lower()
    
    # Check for healthcare keywords
    has_healthcare_keywords = any(keyword in t for keyword in HEALTHCARE_KEYWORDS)
    
    # Check for healthcare facility types
    has_facility_types = any(
        any(ftype in t for ftype in types) 
        for types in HEALTHCARE_FACILITY_TYPES.values()
    )
    
    # Check for healthcare-specific tasks
    has_healthcare_tasks = any(
        phrase in t for phrase in [
            "bed capacity", "occupancy rates", "facility distribution",
            "long-term care", "health operations", "resource allocation"
        ]
    )
    
    # Check for healthcare data files
    has_healthcare_files = any(
        "health" in path.lower() or "facility" in path.lower() or "bed" in path.lower()
        for path in uploaded_files_paths
    )
    
    # Check for structured scenario format
    has_scenario_structure = any(
        section in t for section in ["background", "situation", "tasks"]
    )
    
    return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \
           (has_healthcare_files or has_scenario_structure)

def process_healthcare_data(uploaded_files_paths, data_registry):
    """Process healthcare data files with robust error handling."""
    for file_path in uploaded_files_paths:
        try:
            file_name = os.path.basename(file_path).lower()
            
            if file_name.endswith('.csv'):
                df = pd.read_csv(file_path)
                
                # Standardize column names
                df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]
                
                # Handle healthcare-specific data structures
                if 'facility_name' in df.columns:
                    if 'facility_type' not in df.columns and 'odhf_facility_type' in df.columns:
                        df['facility_type'] = df['odhf_facility_type']
                
                if 'beds_current' in df.columns and 'beds_prev' in df.columns:
                    df['bed_change'] = df['beds_current'] - df['beds_prev']
                    df['percent_change'] = (df['bed_change'] / df['beds_prev']) * 100
                
                data_registry.add_path(file_path)
                
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            log_event("data_processing_error", None, {
                "file": file_path,
                "error": str(e)
            })

def analyze_facility_distribution(facilities_df):
    """Analyze healthcare facility distribution by type and location."""
    try:
        # Filter to Alberta if province column exists
        if 'province' in facilities_df.columns:
            ab_facilities = facilities_df[facilities_df['province'] == 'ab']
        else:
            ab_facilities = facilities_df
        
        # Facility type frequency
        type_counts = ab_facilities['facility_type'].value_counts().to_dict()
        
        # Top cities by facility count
        if 'city' in ab_facilities.columns:
            city_counts = ab_facilities['city'].value_counts().head(5)
            top_cities = city_counts.index.tolist()
            
            # Breakdown by facility type for top cities
            city_breakdown = {}
            for city in top_cities:
                city_data = ab_facilities[ab_facilities['city'] == city]
                city_breakdown[city] = city_data['facility_type'].value_counts().to_dict()
        else:
            top_cities = []
            city_breakdown = {}
        
        return {
            "total_facilities": len(ab_facilities),
            "type_distribution": type_counts,
            "top_cities": top_cities,
            "city_breakdown": city_breakdown
        }
    except Exception as e:
        log_event("facility_analysis_error", None, {"error": str(e)})
        return {"error": str(e)}

def analyze_bed_capacity(beds_df):
    """Analyze bed capacity by zone and identify trends."""
    try:
        # Filter to Alberta if province column exists
        if 'province' in beds_df.columns:
            ab_beds = beds_df[beds_df['province'] == 'alberta']
        else:
            ab_beds = beds_df
        
        # Calculate zone-level summaries
        if 'zone' in ab_beds.columns:
            zone_summary = ab_beds.groupby('zone').agg({
                'beds_current': 'sum',
                'beds_prev': 'sum',
                'bed_change': 'sum'
            }).reset_index()
            
            # Calculate percentage change
            zone_summary['percent_change'] = (zone_summary['bed_change'] / zone_summary['beds_prev']) * 100
            
            # Find zones with largest changes
            max_abs_decrease = zone_summary.loc[zone_summary['bed_change'].idxmin()]
            max_pct_decrease = zone_summary.loc[zone_summary['percent_change'].idxmin()]
            
            # Identify facilities with largest declines
            facilities_decline = ab_beds.sort_values('bed_change').head(5)
        else:
            zone_summary = pd.DataFrame()
            max_abs_decrease = {}
            max_pct_decrease = {}
            facilities_decline = pd.DataFrame()
        
        return {
            "zone_summary": zone_summary.to_dict('records'),
            "max_absolute_decrease": max_abs_decrease.to_dict(),
            "max_percentage_decrease": max_pct_decrease.to_dict(),
            "facilities_with_largest_declines": facilities_decline.to_dict('records')
        }
    except Exception as e:
        log_event("bed_analysis_error", None, {"error": str(e)})
        return {"error": str(e)}

def assess_long_term_capacity(facilities_df, beds_df, zone_name):
    """Assess long-term care capacity in a specific zone."""
    try:
        # Get facilities in the specified zone
        if 'zone' in facilities_df.columns:
            zone_facilities = facilities_df[facilities_df['zone'] == zone_name]
        else:
            # If zone column not available, use province
            zone_facilities = facilities_df[facilities_df['province'] == 'ab']
        
        # Find major city in zone
        if 'city' in zone_facilities.columns:
            city_counts = zone_facilities['city'].value_counts()
            major_city = city_counts.index[0] if len(city_counts) > 0 else None
            
            if major_city:
                city_facilities = zone_facilities[zone_facilities['city'] == major_city]
                
                # Count facility types
                facility_counts = city_facilities['facility_type'].value_counts().to_dict()
                
                # Calculate ratio of nursing/residential to hospitals
                hospitals = facility_counts.get('Hospitals', 0)
                nursing = facility_counts.get('Nursing and residential care facilities', 0)
                ratio = nursing / hospitals if hospitals > 0 else 0
                
                # Assess capacity
                capacity_assessment = "sufficient" if ratio >= 1.5 else "insufficient"
                
                return {
                    "zone": zone_name,
                    "major_city": major_city,
                    "facility_counts": facility_counts,
                    "nursing_to_hospital_ratio": ratio,
                    "capacity_assessment": capacity_assessment
                }
        
        return {"error": "Could not determine major city or facility counts"}
    except Exception as e:
        log_event("ltc_assessment_error", None, {"error": str(e)})
        return {"error": str(e)}

def generate_operational_recommendations(analysis_results):
    """Generate data-driven operational recommendations."""
    recommendations = []
    
    # Recommendation 1: Address bed capacity issues
    if 'bed_capacity' in analysis_results:
        bed_data = analysis_results['bed_capacity']
        if 'max_percentage_decrease' in bed_data:
            zone = bed_data['max_percentage_decrease'].get('zone', '')
            decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
            recommendations.append({
                "title": f"Restore staffed beds in {zone} Zone",
                "description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.",
                "data_source": "Bed capacity analysis"
            })
    
    # Recommendation 2: Expand long-term care capacity
    if 'long_term_care' in analysis_results:
        ltc_data = analysis_results['long_term_care']
        if ltc_data.get('capacity_assessment') == 'insufficient':
            city = ltc_data.get('major_city', '')
            recommendations.append({
                "title": f"Expand long-term care capacity in {city}",
                "description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.",
                "data_source": "Long-term care capacity assessment"
            })
    
    # Recommendation 3: Implement surge plans
    if 'bed_capacity' in analysis_results:
        recommendations.append({
            "title": "Implement surge capacity plans",
            "description": "Develop modular units and activate staffing pools to handle unpredictable spikes in demand.",
            "data_source": "Bed capacity trends"
        })
    
    return recommendations

def generate_ai_integration_discussion(analysis_results):
    """Generate discussion on future AI integration for healthcare operations."""
    return {
        "title": "Future Integration for Augmented Decision-Making",
        "description": "Combining facility information with operational data like emergency department wait times and disease surveillance can enable AI-driven resource optimization.",
        "example": "A model could ingest current ED wait times, hospital occupancy, and community case counts to forecast bed demand by zone and recommend redirecting ambulances to facilities with spare capacity.",
        "metrics": ["Hospital occupancy rates", "ED wait times", "Disease surveillance data"]
    }

def format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration):
    """Format the healthcare analysis response with tables and sections."""
    response = "# Structured Analysis: Healthcare Scenario\n\n"
    
    # Data Preparation Section
    if 'facility_distribution' in results:
        fd = results['facility_distribution']
        response += "## 1. Data Preparation\n\n"
        response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n"
        
        if 'type_distribution' in fd:
            response += "### Facility Type Distribution\n\n"
            for ftype, count in fd['type_distribution'].items():
                response += f"- {ftype}: {count}\n"
            response += "\n"
        
        if 'city_breakdown' in fd:
            response += "### Top Cities by Facility Count\n\n"
            response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
            response += "|------|-----------|-------------------|------------|-------|\n"
            
            for city, breakdown in fd['city_breakdown'].items():
                hospitals = breakdown.get('Hospitals', 0)
                nursing = breakdown.get('Nursing and residential care facilities', 0)
                ambulatory = breakdown.get('Ambulatory health care services', 0)
                total = hospitals + nursing + ambulatory
                response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
            response += "\n"
    
    # Bed Capacity Analysis Section
    if 'bed_capacity' in results:
        bc = results['bed_capacity']
        response += "## 2. Bed Capacity Analysis\n\n"
        
        if 'zone_summary' in bc:
            response += "### Bed Capacity by Zone\n\n"
            response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n"
            response += "|------|---------------|---------------|-----------------|----------------|\n"
            
            for zone_data in bc['zone_summary']:
                zone = zone_data.get('zone', 'N/A')
                current = zone_data.get('beds_current', 'N/A')
                prev = zone_data.get('beds_prev', 'N/A')
                change = zone_data.get('bed_change', 'N/A')
                pct = zone_data.get('percent_change', 'N/A')
                response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
            response += "\n"
        
        if 'max_absolute_decrease' in bc and 'max_percentage_decrease' in bc:
            abs_dec = bc['max_absolute_decrease']
            pct_dec = bc['max_percentage_decrease']
            response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
            response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
        
        if 'facilities_with_largest_declines' in bc:
            response += "### Facilities with Largest Bed Declines\n\n"
            response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
            response += "|----------|------|----------------|-----------|\n"
            
            for facility in bc['facilities_with_largest_declines']:
                name = facility.get('facility_name', 'N/A')
                zone = facility.get('zone', 'N/A')
                teaching = facility.get('teaching_status', 'N/A')
                change = facility.get('bed_change', 'N/A')
                response += f"| {name} | {zone} | {teaching} | {change} |\n"
            response += "\n"
    
    # Long-term Care Section
    if 'long_term_care' in results:
        ltc = results['long_term_care']
        response += "## 3. Long-Term Care Capacity Assessment\n\n"
        
        zone = ltc.get('zone', 'N/A')
        city = ltc.get('major_city', 'N/A')
        ratio = ltc.get('nursing_to_hospital_ratio', 0)
        assessment = ltc.get('capacity_assessment', 'N/A')
        
        response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
        response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
        
        if 'facility_counts' in ltc:
            response += "### Facility Counts\n\n"
            for ftype, count in ltc['facility_counts'].items():
                response += f"- {ftype}: {count}\n"
            response += "\n"
    
    # Recommendations Section
    response += "## 4. Operational Recommendations\n\n"
    for rec in recommendations:
        response += f"### {rec['title']}\n\n"
        response += f"{rec['description']}\n\n"
        response += f"*Data source: {rec['data_source']}*\n\n"
    
    # AI Integration Section
    response += "## 5. Future Integration for Augmented AI\n\n"
    response += f"### {ai_integration['title']}\n\n"
    response += f"{ai_integration['description']}\n\n"
    response += f"**Example**: {ai_integration['example']}\n\n"
    response += "**Key metrics to incorporate**:\n"
    for metric in ai_integration['metrics']:
        response += f"- {metric}\n"
    response += "\n"
    
    # Provenance Section
    response += "## Provenance\n\n"
    response += "This analysis is based on:\n"
    response += "- Scenario description provided by the user\n"
    response += "- Uploaded data files\n"
    response += "- Calculations performed on the provided data\n"
    
    return response

def handle_healthcare_scenario(scenario_text, data_registry, history):
    """Handle healthcare-specific scenario analysis."""
    try:
        # Initialize analysis results
        results = {}
        
        # Task 1: Data preparation
        facilities_df = None
        beds_df = None
        
        for file_name in data_registry.names():
            df = data_registry.get(file_name)
            if 'facility' in file_name.lower() or 'health' in file_name.lower():
                facilities_df = df
            elif 'bed' in file_name.lower():
                beds_df = df
        
        if facilities_df is not None:
            results['facility_distribution'] = analyze_facility_distribution(facilities_df)
        
        # Task 2: Bed capacity analysis
        if beds_df is not None:
            results['bed_capacity'] = analyze_bed_capacity(beds_df)
            
            # Task 3: Long-term care capacity assessment
            if 'zone' in beds_df.columns and 'max_percentage_decrease' in results['bed_capacity']:
                worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '')
                if worst_zone and facilities_df is not None:
                    results['long_term_care'] = assess_long_term_capacity(
                        facilities_df, 
                        beds_df, 
                        worst_zone
                    )
        
        # Generate operational recommendations
        recommendations = generate_operational_recommendations(results)
        
        # Generate future AI integration discussion
        ai_integration = generate_ai_integration_discussion(results)
        
        # Compile final response
        response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
        
        return response
    except Exception as e:
        log_event("healthcare_scenario_error", None, {"error": str(e)})
        return f"Error analyzing healthcare scenario: {str(e)}"

# ---------- Model loading helpers ----------
def pick_dtype_and_map():
    if torch.cuda.is_available():
        return torch.float16, "auto"
    if torch.backends.mps.is_available():
        return torch.float16, {"": "mps"}
    return torch.float32, "cpu"

@lru_cache(maxsize=1)
def load_local_model():
    if not HF_TOKEN:
        raise RuntimeError("HUGGINGFACE_HUB_TOKEN is not set.")
    login(token=HF_TOKEN, add_to_git_credential=False)
    dtype, device_map = pick_dtype_and_map()
    tok = AutoTokenizer.from_pretrained(
        MODEL_ID, token=HF_TOKEN, use_fast=True, model_max_length=8192,
        padding_side="left", trust_remote_code=True,
        cache_dir=os.environ.get("TRANSFORMERS_CACHE")
    )
    try:
        mdl = AutoModelForCausalLM.from_pretrained(
            MODEL_ID, token=HF_TOKEN, device_map=device_map,
            low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
        )
    except Exception:
        mdl = AutoModelForCausalLM.from_pretrained(
            MODEL_ID, token=HF_TOKEN,
            low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
        )
        mdl.to("cuda" if torch.cuda.is_available() else "cpu")
    if mdl.config.eos_token_id is None and tok.eos_token_id is not None:
        mdl.config.eos_token_id = tok.eos_token_id
    return mdl, tok

# ---------- Chat helpers ----------
def is_identity_query(message, history):
    patterns = [
        r"\bwho\s+are\s+you\b", r"\bwhat\s+are\s+you\b", r"\bwhat\s+is\s+your\s+name\b",
        r"\bwho\s+is\s+this\b", r"\bidentify\s+yourself\b", r"\btell\s+me\s+about\s+yourself\b",
        r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
        r"\bwho\s+am\s+i\s+chatting\s+with\b",
    ]
    def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns)
    if match(message): return True
    if history:
        last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None
        if match(last_user): return True
    return False

def _iter_user_assistant(history):
    for item in (history or []):
        if isinstance(item, (list, tuple)):
            u = item[0] if len(item) > 0 else ""
            a = item[1] if len(item) > 1 else ""
            yield u, a

def _sanitize_text(s: str) -> str:
    if not isinstance(s, str):
        return s
    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)

def cohere_chat(message, history):
    if not USE_HOSTED_COHERE:
        return None
    try:
        client = cohere.Client(api_key=COHERE_API_KEY)
        parts = []
        for u, a in _iter_user_assistant(history):
            if u: parts.append(f"User: {u}")
            if a: parts.append(f"Assistant: {a}")
        parts.append(f"User: {message}")
        prompt = "\n".join(parts) + "\nAssistant:"
        resp = client.chat(
            model="command-r7b-12-2024",
            message=prompt,
            temperature=0.3,
            max_tokens=MAX_NEW_TOKENS,
        )
        if hasattr(resp, "text") and resp.text: return resp.text.strip()
        if hasattr(resp, "reply") and resp.reply: return resp.reply.strip()
        if hasattr(resp, "generations") and resp.generations: return resp.generations[0].text.strip()
        return None
    except Exception:
        return None

def build_inputs(tokenizer, message, history):
    msgs = [{"role": "system", "content": SYSTEM_MASTER}]
    for u, a in _iter_user_assistant(history):
        if u: msgs.append({"role": "user", "content": u})
        if a: msgs.append({"role": "assistant", "content": a})
    msgs.append({"role": "user", "content": message})
    return tokenizer.apply_chat_template(
        msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    )

def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
    input_ids = input_ids.to(model.device)
    with torch.no_grad():
        out = model.generate(
            input_ids=input_ids, max_new_tokens=max_new_tokens,
            do_sample=True, temperature=0.3, top_p=0.9,
            repetition_penalty=1.15,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    gen_only = out[0, input_ids.shape[-1]:]
    return tokenizer.decode(gen_only, skip_special_tokens=True).strip()

# ---------- Core chat logic ----------
def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
    try:
        log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})

        safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
        if blocked_in:
            ans = refusal_reply(reason_in)
            return history + [(user_msg, ans)], awaiting_answers

        if is_identity_query(safe_in, history):
            ans = "I am an AI analytical system designed to help you analyze healthcare scenarios and make data-driven decisions."
            return history + [(user_msg, ans)], awaiting_answers

        # Initialize data registry and session RAG
        data_registry = DataRegistry()
        session_rag = SessionRAG()

        # Process uploaded files
        if uploaded_files_paths:
            process_healthcare_data(uploaded_files_paths, data_registry)
            
            # Update session RAG with CSV columns
            for file_name in data_registry.names():
                if file_name.endswith('.csv'):
                    df = data_registry.get(file_name)
                    session_rag.csv_columns = list(df.columns)

        # Check if this is a healthcare scenario
        if is_healthcare_scenario(safe_in, uploaded_files_paths):
            # Handle healthcare scenario directly
            response = handle_healthcare_scenario(safe_in, data_registry, history)
            return history + [(user_msg, response)], False

        # For non-healthcare scenarios, use the original logic
        # ... (Original non-healthcare scenario handling would go here)
        # For now, provide a fallback response
        response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze."
        return history + [(user_msg, response)], awaiting_answers

    except Exception as e:
        err = f"Error: {e}"
        try:
            traceback.print_exc()
        except Exception:
            pass
        return history + [(user_msg, err)], awaiting_answers

# ---------- UI Setup ----------
theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
custom_css = """
:root { --brand-bg: #0f172a; --brand-accent: #0d9488; --brand-text: #0f172a; --brand-text-light: #ffffff; }
html, body, .gradio-container { height: 100vh; }
.gradio-container { background: var(--brand-bg); display: flex; flex-direction: column; }
/* HERO (landing) */
#hero-wrap { height: 70vh; display: grid; place-items: center; }
#hero { text-align: center; }
#hero h2 { color: #0f172a; font-weight: 800; font-size: 32px; margin-bottom: 22px; }
#hero .search-row { width: min(860px, 92vw); margin: 0 auto; display: flex; gap: 8px; align-items: stretch; }
#hero .search-row .hero-box { flex: 1 1 auto; }
#hero .search-row .hero-box textarea { height: 52px !important; }
#hero-send > button { height: 52px !important; padding: 0 18px !important; border-radius: 12px !important; }
#hero .hint { color: #334155; margin-top: 10px; font-size: 13px; opacity: 0.9; }
/* CHAT */
#chat-container { position: relative; }
.chatbot header, .chatbot .label, .chatbot .label-wrap { display: none !important; }
.message.user, .message.bot { background: var(--brand-accent) !important; color: var(--brand-text-light) !important; border-radius: 12px !important; padding: 8px 12px !important; }
textarea, input, .gr-input { border-radius: 12px !important; }
/* Chat input row equal heights */
#chat-input-row { align-items: stretch; }
#chat-msg textarea { height: 52px !important; }
#chat-send > button, #chat-clear > button { height: 52px !important; padding: 0 18px !important; border-radius: 12px !important; }
"""

# ---------- Main App ----------
with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
    # --- HERO (initial screen) ---
    with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap:
        with gr.Column(elem_id="hero"):
            gr.HTML("<h2>What healthcare scenario can I help you analyze?</h2>")
            with gr.Row(elem_classes="search-row"):
                hero_msg = gr.Textbox(
                    placeholder="Describe your healthcare scenario or upload data files for analysis…",
                    show_label=False,
                    lines=1,
                    elem_classes="hero-box"
                )
                hero_send = gr.Button("➤", scale=0, elem_id="hero-send")
            gr.Markdown('<div class="hint">Upload healthcare data files (CSV, PDF, etc.) and describe your scenario for comprehensive analysis.</div>')

    # --- MAIN APP (hidden until first message) ---
    with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
        chat = gr.Chatbot(label="", show_label=False, height="80vh")
        with gr.Row():
            uploads = gr.Files(
                label="Upload healthcare data files",
                file_types=["file"], file_count="multiple", height=68
            )
        with gr.Row(elem_id="chat-input-row"):
            msg = gr.Textbox(
                label="",
                show_label=False,
                placeholder="Continue the conversation. Provide additional details or answer clarifying questions.",
                scale=10,
                elem_id="chat-msg",
                lines=1,
            )
            send = gr.Button("Send", scale=1, elem_id="chat-send")
            clear = gr.Button("Clear chat", scale=1, elem_id="chat-clear")

    # ---- State
    state_history = gr.State(value=[])
    state_uploaded = gr.State(value=[])
    state_awaiting = gr.State(value=False)

    # ---- Uploads
    def _store_uploads(files, current):
        paths = []
        for f in (files or []):
            paths.append(getattr(f, "name", None) or f)
        return (current or []) + paths

    uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)

    # ---- Core send (used by both hero input and chat input)
    def _on_send(user_msg, history, up_paths, awaiting):
        try:
            if not user_msg or not user_msg.strip():
                return history, "", history, awaiting
            new_history, new_awaiting = clarityops_reply(
                user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting
            )
            return new_history, "", new_history, new_awaiting
        except Exception as e:
            err = f"Error: {e}"
            try: traceback.print_exc()
            except Exception: pass
            new_hist = (history or []) + [(user_msg or "", err)]
            return new_hist, "", new_hist, awaiting

    # ---- Hero -> App transition + first send
    def _hero_start(user_msg, history, up_paths, awaiting):
        chat_o, msg_o, hist_o, await_o = _on_send(user_msg, history, up_paths, awaiting)
        return (
            chat_o, msg_o, hist_o, await_o,
            gr.update(visible=False),
            gr.update(visible=True),
            ""
        )

    hero_send.click(
        _hero_start,
        inputs=[hero_msg, state_history, state_uploaded, state_awaiting],
        outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg],
        concurrency_limit=2, queue=True
    )
    hero_msg.submit(
        _hero_start,
        inputs=[hero_msg, state_history, state_uploaded, state_awaiting],
        outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg],
        concurrency_limit=2, queue=True
    )

    # ---- Normal chat interactions after hero is gone
    send.click(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting],
               outputs=[chat, msg, state_history, state_awaiting],
               concurrency_limit=2, queue=True)
    msg.submit(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting],
               outputs=[chat, msg, state_history, state_awaiting],
               concurrency_limit=2, queue=True)

    def _on_clear():
        return (
            [], "", [], False,
            gr.update(visible=True),
            gr.update(visible=False),
            ""
        )

    clear.click(_on_clear, None, [chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg])

if __name__ == "__main__":
    port = int(os.environ.get("PORT", "7860"))
    demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=40)