Rajan Sharma commited on
Commit
ff957d1
·
verified ·
1 Parent(s): ef17f73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -460
app.py CHANGED
@@ -10,11 +10,16 @@ import torch
10
  import regex as re2
11
 
12
  # Import necessary modules
13
- from settings import SNAPSHOT_PATH, PERSIST_CONTENT, HEALTHCARE_SETTINGS, MODEL_SETTINGS
 
 
 
14
  from audit_log import log_event, hash_summary
15
  from privacy import redact_text, safety_filter, refusal_reply
16
  from data_registry import DataRegistry
17
  from upload_ingest import extract_text_from_files
 
 
18
 
19
  # ---------- Writable caches (HF Spaces-safe) ----------
20
  HOME = pathlib.Path.home()
@@ -57,22 +62,6 @@ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
57
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
58
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", MODEL_SETTINGS.get("max_new_tokens", 2048)))
59
 
60
- # ---------- Generic System Prompt ----------
61
- SYSTEM_MASTER = """
62
- SYSTEM ROLE
63
- You are an AI analytical system that provides data-driven insights for any scenario.
64
- Absolute rules:
65
- - Use ONLY information provided in this conversation (scenario text + uploaded files + user answers).
66
- - Never invent data. If something required is missing after clarifications, write the literal token: INSUFFICIENT_DATA.
67
- - Provide clear analysis with calculations, evidence, and reasoning.
68
- - Maintain privacy safeguards (aggregate data; suppress small cohorts <10).
69
- - Adapt your analysis approach to the specific scenario and data provided.
70
- Formatting rules for structured analysis:
71
- - Start with the header: "Structured Analysis"
72
- - Organize analysis into logical sections based on the scenario requirements
73
- - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
74
- """.strip()
75
-
76
  # ---------- Helper Functions ----------
77
  def find_column(df, patterns):
78
  """Find the first column in df that matches any of the patterns."""
@@ -193,447 +182,18 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
193
  "error": str(e)
194
  })
195
 
196
- def analyze_facility_distribution(facilities_df):
197
- """Analyze healthcare facility distribution dynamically."""
198
- try:
199
- # Validate input
200
- if facilities_df is None or facilities_df.empty:
201
- return {"error": "No facility data provided"}
202
-
203
- # Find province column
204
- province_col = find_column(facilities_df, ['province', 'state', 'territory'])
205
- if province_col:
206
- alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
207
- ab_facilities = facilities_df[alberta_mask].copy()
208
- else:
209
- ab_facilities = facilities_df.copy()
210
-
211
- # Find facility type column
212
- type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
213
- if not type_col:
214
- return {"error": "Facility type column not found"}
215
-
216
- # Facility type frequency
217
- type_counts = ab_facilities[type_col].value_counts().to_dict()
218
-
219
- # Find city column
220
- city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
221
- if city_col:
222
- city_counts = ab_facilities[city_col].value_counts().head(5)
223
- top_cities = city_counts.index.tolist()
224
-
225
- # Breakdown by facility type for top cities
226
- city_breakdown = {}
227
- for city in top_cities:
228
- city_data = ab_facilities[ab_facilities[city_col] == city]
229
- city_breakdown[city] = city_data[type_col].value_counts().to_dict()
230
- else:
231
- top_cities = []
232
- city_breakdown = {}
233
-
234
- return {
235
- "total_facilities": len(ab_facilities),
236
- "type_distribution": type_counts,
237
- "top_cities": top_cities,
238
- "city_breakdown": city_breakdown,
239
- "columns_used": {
240
- "facility_type": type_col,
241
- "city": city_col,
242
- "province": province_col
243
- }
244
- }
245
- except Exception as e:
246
- log_event("facility_analysis_error", None, {"error": str(e)})
247
- return {"error": str(e)}
248
-
249
- def analyze_bed_capacity(beds_df):
250
- """Analyze bed capacity dynamically."""
251
- try:
252
- # Validate input
253
- if beds_df is None or beds_df.empty:
254
- return {"error": "No bed data provided"}
255
-
256
- # Find required columns
257
- current_col = find_column(beds_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
258
- prev_col = find_column(beds_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
259
-
260
- if not current_col or not prev_col:
261
- return {"error": f"Missing required columns. Found current: {current_col}, prev: {prev_col}"}
262
-
263
- # Ensure derived columns exist
264
- if 'bed_change' not in beds_df.columns:
265
- beds_df['bed_change'] = beds_df[current_col] - beds_df[prev_col]
266
-
267
- if 'percent_change' not in beds_df.columns:
268
- beds_df['percent_change'] = beds_df.apply(
269
- lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
270
- axis=1
271
- )
272
-
273
- # Filter to Alberta if province column exists
274
- province_col = find_column(beds_df, ['province', 'state', 'territory'])
275
- if province_col:
276
- alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
277
- ab_beds = beds_df[alberta_mask].copy()
278
- else:
279
- ab_beds = beds_df.copy()
280
-
281
- # Calculate zone-level summaries if zone column exists
282
- zone_col = find_column(beds_df, ['zone', 'region', 'area', 'district'])
283
- if zone_col:
284
- zone_summary = ab_beds.groupby(zone_col).agg({
285
- current_col: 'sum',
286
- prev_col: 'sum',
287
- 'bed_change': 'sum'
288
- }).reset_index()
289
-
290
- zone_summary['percent_change'] = zone_summary.apply(
291
- lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
292
- axis=1
293
- )
294
-
295
- # Find zones with largest changes
296
- if len(zone_summary) > 0:
297
- max_abs_decrease_idx = zone_summary['bed_change'].idxmin()
298
- max_pct_decrease_idx = zone_summary['percent_change'].idxmin()
299
-
300
- max_abs_decrease = zone_summary.loc[max_abs_decrease_idx]
301
- max_pct_decrease = zone_summary.loc[max_pct_decrease_idx]
302
- else:
303
- max_abs_decrease = {}
304
- max_pct_decrease = {}
305
-
306
- # Identify facilities with largest declines
307
- facilities_decline = ab_beds.sort_values('bed_change').head(5)
308
- else:
309
- zone_summary = pd.DataFrame()
310
- max_abs_decrease = {}
311
- max_pct_decrease = {}
312
- facilities_decline = pd.DataFrame()
313
-
314
- return {
315
- "zone_summary": zone_summary.to_dict('records') if not zone_summary.empty else [],
316
- "max_absolute_decrease": max_abs_decrease.to_dict() if isinstance(max_abs_decrease, pd.Series) else max_abs_decrease,
317
- "max_percentage_decrease": max_pct_decrease.to_dict() if isinstance(max_pct_decrease, pd.Series) else max_pct_decrease,
318
- "facilities_with_largest_declines": facilities_decline.to_dict('records') if not facilities_decline.empty else [],
319
- "columns_used": {
320
- "beds_current": current_col,
321
- "beds_prev": prev_col,
322
- "zone": zone_col,
323
- "province": province_col
324
- }
325
- }
326
- except Exception as e:
327
- log_event("bed_analysis_error", None, {"error": str(e)})
328
- return {"error": str(e)}
329
-
330
- def assess_long_term_capacity(facilities_df, beds_df, zone_name):
331
- """Assess long-term care capacity dynamically."""
332
- try:
333
- # Validate inputs
334
- if facilities_df is None or facilities_df.empty:
335
- return {"error": "No facility data provided"}
336
-
337
- # Find relevant columns
338
- zone_col = find_column(facilities_df, ['zone', 'region', 'area', 'district'])
339
- city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
340
- type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
341
-
342
- if not type_col:
343
- return {"error": "Facility type column not found"}
344
-
345
- # Get facilities in the specified zone
346
- if zone_col:
347
- zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
348
- else:
349
- # If zone column not available, use province
350
- province_col = find_column(facilities_df, ['province', 'state', 'territory'])
351
- if province_col:
352
- alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
353
- zone_facilities = facilities_df[alberta_mask].copy()
354
- else:
355
- zone_facilities = facilities_df.copy()
356
-
357
- # Find major city in zone
358
- if city_col:
359
- city_counts = zone_facilities[city_col].value_counts()
360
- if len(city_counts) > 0:
361
- major_city = city_counts.index[0]
362
- city_facilities = zone_facilities[zone_facilities[city_col] == major_city]
363
-
364
- # Count facility types
365
- facility_counts = city_facilities[type_col].value_counts().to_dict()
366
-
367
- # Calculate ratio of nursing/residential to hospitals
368
- hospitals = sum(count for key, count in facility_counts.items() if 'hospital' in key.lower())
369
- nursing = sum(count for key, count in facility_counts.items() if any(word in key.lower() for word in ['nursing', 'residential', 'care']))
370
- ratio = nursing / hospitals if hospitals > 0 else 0
371
-
372
- # Assess capacity
373
- capacity_assessment = "sufficient" if ratio >= 1.5 else "insufficient"
374
-
375
- return {
376
- "zone": zone_name,
377
- "major_city": major_city,
378
- "facility_counts": facility_counts,
379
- "nursing_to_hospital_ratio": ratio,
380
- "capacity_assessment": capacity_assessment,
381
- "columns_used": {
382
- "zone": zone_col,
383
- "city": city_col,
384
- "facility_type": type_col
385
- }
386
- }
387
-
388
- return {"error": "Could not determine major city or facility counts"}
389
- except Exception as e:
390
- log_event("ltc_assessment_error", None, {"error": str(e)})
391
- return {"error": str(e)}
392
-
393
- def generate_operational_recommendations(analysis_results):
394
- """Generate data-driven operational recommendations."""
395
- recommendations = []
396
-
397
- # Recommendation 1: Address bed capacity issues
398
- if 'bed_capacity' in analysis_results:
399
- bed_data = analysis_results['bed_capacity']
400
- if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
401
- zone_col = bed_data.get('columns_used', {}).get('zone')
402
- zone = bed_data['max_percentage_decrease'].get(zone_col, '') if zone_col else ''
403
- decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
404
- if zone and decrease:
405
- recommendations.append({
406
- "title": f"Restore staffed beds in {zone} Zone",
407
- "description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.",
408
- "data_source": "Bed capacity analysis"
409
- })
410
-
411
- # Recommendation 2: Expand long-term care capacity
412
- if 'long_term_care' in analysis_results:
413
- ltc_data = analysis_results['long_term_care']
414
- if ltc_data.get('capacity_assessment') == 'insufficient':
415
- city = ltc_data.get('major_city', '')
416
- if city:
417
- recommendations.append({
418
- "title": f"Expand long-term care capacity in {city}",
419
- "description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.",
420
- "data_source": "Long-term care capacity assessment"
421
- })
422
-
423
- # Recommendation 3: Implement surge plans
424
- if 'bed_capacity' in analysis_results:
425
- recommendations.append({
426
- "title": "Implement surge capacity plans",
427
- "description": "Develop modular units and activate staffing pools to handle unpredictable spikes in demand.",
428
- "data_source": "Bed capacity trends"
429
- })
430
-
431
- return recommendations
432
-
433
- def generate_ai_integration_discussion(analysis_results):
434
- """Generate discussion on future AI integration for healthcare operations."""
435
- return {
436
- "title": "Future Integration for Augmented Decision-Making",
437
- "description": "Combining facility information with operational data like emergency department wait times and disease surveillance can enable AI-driven resource optimization.",
438
- "example": "A model could ingest current ED wait times, hospital occupancy, and community case counts to forecast bed demand by zone and recommend redirecting ambulances to facilities with spare capacity.",
439
- "metrics": ["Hospital occupancy rates", "ED wait times", "Disease surveillance data"]
440
- }
441
-
442
- def format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration):
443
- """Format the healthcare analysis response with tables and sections."""
444
- response = "# Structured Analysis: Healthcare Scenario\n\n"
445
-
446
- # Extract tasks from scenario to ensure we address all requirements
447
- tasks = extract_scenario_tasks(scenario_text)
448
-
449
- # Data Preparation Section
450
- if 'facility_distribution' in results:
451
- fd = results['facility_distribution']
452
- if 'error' in fd:
453
- response += "## 1. Data Preparation\n\n"
454
- response += f"Error in facility distribution analysis: {fd['error']}\n\n"
455
- else:
456
- response += "## 1. Data Preparation\n\n"
457
- response += f"Total healthcare facilities: {fd.get('total_facilities', 'N/A')}\n\n"
458
-
459
- if 'type_distribution' in fd and isinstance(fd['type_distribution'], dict):
460
- response += "### Facility Type Distribution\n\n"
461
- for ftype, count in fd['type_distribution'].items():
462
- response += f"- {ftype}: {count}\n"
463
- response += "\n"
464
-
465
- if 'city_breakdown' in fd and isinstance(fd['city_breakdown'], dict):
466
- response += "### Top Cities by Facility Count\n\n"
467
- response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
468
- response += "|------|-----------|-------------------|------------|-------|\n"
469
-
470
- for city, breakdown in fd['city_breakdown'].items():
471
- hospitals = breakdown.get('Hospitals', 0)
472
- nursing = breakdown.get('Nursing and residential care facilities', 0)
473
- ambulatory = breakdown.get('Ambulatory health care services', 0)
474
- total = hospitals + nursing + ambulatory
475
- response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
476
- response += "\n"
477
-
478
- # Bed Capacity Analysis Section
479
- if 'bed_capacity' in results:
480
- bc = results['bed_capacity']
481
- if 'error' in bc:
482
- response += "## 2. Bed Capacity Analysis\n\n"
483
- response += f"Error in bed capacity analysis: {bc['error']}\n\n"
484
- else:
485
- response += "## 2. Bed Capacity Analysis\n\n"
486
-
487
- if 'zone_summary' in bc and bc['zone_summary']:
488
- response += "### Bed Capacity by Zone\n\n"
489
- response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
490
- response += "|------|---------------|-----------------|-----------------|----------------|\n"
491
-
492
- zone_col = bc.get('columns_used', {}).get('zone')
493
- current_col = bc.get('columns_used', {}).get('beds_current')
494
- prev_col = bc.get('columns_used', {}).get('beds_prev')
495
-
496
- for zone_data in bc['zone_summary']:
497
- zone = zone_data.get(zone_col, 'N/A') if zone_col else 'N/A'
498
- current = zone_data.get(current_col, 'N/A') if current_col else 'N/A'
499
- prev = zone_data.get(prev_col, 'N/A') if prev_col else 'N/A'
500
- change = zone_data.get('bed_change', 'N/A')
501
- pct = zone_data.get('percent_change', 'N/A')
502
- response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
503
- response += "\n"
504
-
505
- if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
506
- 'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
507
- zone_col = bc.get('columns_used', {}).get('zone')
508
- abs_dec = bc['max_absolute_decrease']
509
- pct_dec = bc['max_percentage_decrease']
510
- response += f"**Zone with largest absolute decrease**: {abs_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
511
- response += f"**Zone with largest percentage decrease**: {pct_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
512
-
513
- if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
514
- response += "### Facilities with Largest Bed Declines\n\n"
515
- response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
516
- response += "|----------|------|----------------|-----------|\n"
517
-
518
- zone_col = bc.get('columns_used', {}).get('zone')
519
- for facility in bc['facilities_with_largest_declines']:
520
- name = facility.get('facility_name', 'N/A')
521
- zone = facility.get(zone_col, 'N/A') if zone_col else 'N/A'
522
- teaching = facility.get('teaching_status', 'N/A')
523
- change = facility.get('bed_change', 'N/A')
524
- response += f"| {name} | {zone} | {teaching} | {change} |\n"
525
- response += "\n"
526
-
527
- # Long-term Care Section
528
- if 'long_term_care' in results:
529
- ltc = results['long_term_care']
530
- if 'error' in ltc:
531
- response += "## 3. Long-Term Care Capacity Assessment\n\n"
532
- response += f"Error in long-term care assessment: {ltc['error']}\n\n"
533
- else:
534
- response += "## 3. Long-Term Care Capacity Assessment\n\n"
535
-
536
- zone = ltc.get('zone', 'N/A')
537
- city = ltc.get('major_city', 'N/A')
538
- ratio = ltc.get('nursing_to_hospital_ratio', 0)
539
- assessment = ltc.get('capacity_assessment', 'N/A')
540
-
541
- response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
542
- response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
543
-
544
- if 'facility_counts' in ltc and isinstance(ltc['facility_counts'], dict):
545
- response += "### Facility Counts\n\n"
546
- for ftype, count in ltc['facility_counts'].items():
547
- response += f"- {ftype}: {count}\n"
548
- response += "\n"
549
-
550
- # Operational Recommendations Section
551
- response += "## 4. Operational Recommendations\n\n"
552
- if recommendations:
553
- for rec in recommendations:
554
- response += f"### {rec['title']}\n\n"
555
- response += f"{rec['description']}\n\n"
556
- response += f"*Data source: {rec['data_source']}*\n\n"
557
- else:
558
- response += "No specific recommendations could be generated due to data limitations.\n\n"
559
-
560
- # AI Integration Section
561
- response += "## 5. Future Integration for Augmented AI\n\n"
562
- response += f"### {ai_integration['title']}\n\n"
563
- response += f"{ai_integration['description']}\n\n"
564
- response += f"**Example**: {ai_integration['example']}\n\n"
565
- response += "**Key metrics to incorporate**:\n"
566
- for metric in ai_integration['metrics']:
567
- response += f"- {metric}\n"
568
- response += "\n"
569
-
570
- # Provenance Section
571
- response += "## Provenance\n\n"
572
- response += "This analysis is based on:\n"
573
- response += "- Scenario description provided by the user\n"
574
- response += "- Uploaded data files\n"
575
- response += "- Calculations performed on the provided data\n"
576
-
577
- return response
578
-
579
  def handle_healthcare_scenario(scenario_text, data_registry, history):
580
- """Handle healthcare scenarios dynamically with explicit task following."""
581
  try:
582
- results = {}
 
583
 
584
- # Extract tasks from scenario to ensure we address all requirements
585
- tasks = extract_scenario_tasks(scenario_text)
586
- print(f"Extracted tasks: {tasks}")
587
 
588
- # Dynamically identify relevant files
589
- facility_files = data_registry.get_data_by_type('facility_data')
590
- bed_files = data_registry.get_data_by_type('bed_data')
591
-
592
- # Use the first file of each type (can be enhanced to use multiple)
593
- facilities_df = None
594
- if facility_files:
595
- facilities_df = data_registry.get(facility_files[0])
596
-
597
- beds_df = None
598
- if bed_files:
599
- beds_df = data_registry.get(bed_files[0])
600
-
601
- # Log what we found
602
- log_event("data_files_found", None, {
603
- "facilities": facilities_df is not None,
604
- "beds": beds_df is not None,
605
- "facility_files": facility_files,
606
- "bed_files": bed_files
607
- })
608
-
609
- # Task 1: Data preparation (facility distribution)
610
- if facilities_df is not None:
611
- results['facility_distribution'] = analyze_facility_distribution(facilities_df)
612
-
613
- # Task 2: Bed capacity analysis
614
- if beds_df is not None:
615
- results['bed_capacity'] = analyze_bed_capacity(beds_df)
616
-
617
- # Task 3: Long-term care capacity assessment
618
- if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
619
- zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
620
- if zone_col:
621
- worst_zone = results['bed_capacity']['max_percentage_decrease'].get(zone_col, '')
622
- if worst_zone and facilities_df is not None:
623
- results['long_term_care'] = assess_long_term_capacity(
624
- facilities_df,
625
- beds_df,
626
- worst_zone
627
- )
628
-
629
- # Generate operational recommendations (Task 4.1)
630
- recommendations = generate_operational_recommendations(results)
631
-
632
- # Generate AI integration discussion (Task 4.2)
633
- ai_integration = generate_ai_integration_discussion(results)
634
-
635
- # Format response ensuring all tasks are addressed
636
- response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
637
 
638
  return response
639
  except Exception as e:
@@ -727,8 +287,8 @@ def cohere_chat(message, history):
727
  except Exception:
728
  return None
729
 
730
- def build_inputs(tokenizer, message, history):
731
- msgs = [{"role": "system", "content": SYSTEM_MASTER}]
732
  for u, a in _iter_user_assistant(history):
733
  if u: msgs.append({"role": "user", "content": u})
734
  if a: msgs.append({"role": "assistant", "content": a})
@@ -792,8 +352,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
792
  response = handle_healthcare_scenario(safe_in, data_registry, history)
793
  return history + [(user_msg, response)], False
794
  else:
795
- # General conversation mode
796
- # Try Cohere first if available
797
  if USE_HOSTED_COHERE:
798
  out = cohere_chat(safe_in, history)
799
  if out:
@@ -808,10 +367,12 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
808
  })
809
  return history + [(user_msg, safe_out)], False
810
 
811
- # Fall back to local model
812
  try:
813
  model, tokenizer = load_local_model()
814
- inputs = build_inputs(tokenizer, safe_in, history)
 
 
815
  out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
816
 
817
  if isinstance(out, str):
 
10
  import regex as re2
11
 
12
  # Import necessary modules
13
+ from settings import (
14
+ SNAPSHOT_PATH, PERSIST_CONTENT, HEALTHCARE_SETTINGS, MODEL_SETTINGS,
15
+ HEALTHCARE_SYSTEM_PROMPT, GENERAL_CONVERSATION_PROMPT
16
+ )
17
  from audit_log import log_event, hash_summary
18
  from privacy import redact_text, safety_filter, refusal_reply
19
  from data_registry import DataRegistry
20
  from upload_ingest import extract_text_from_files
21
+ from healthcare_analysis import HealthcareAnalyzer
22
+ from response_formatter import ResponseFormatter
23
 
24
  # ---------- Writable caches (HF Spaces-safe) ----------
25
  HOME = pathlib.Path.home()
 
62
  USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
63
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", MODEL_SETTINGS.get("max_new_tokens", 2048)))
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # ---------- Helper Functions ----------
66
  def find_column(df, patterns):
67
  """Find the first column in df that matches any of the patterns."""
 
182
  "error": str(e)
183
  })
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def handle_healthcare_scenario(scenario_text, data_registry, history):
186
+ """Handle healthcare scenarios with enhanced analysis"""
187
  try:
188
+ # Initialize analyzer
189
+ analyzer = HealthcareAnalyzer(data_registry)
190
 
191
+ # Perform comprehensive analysis
192
+ results = analyzer.comprehensive_analysis(scenario_text)
 
193
 
194
+ # Format response
195
+ formatter = ResponseFormatter()
196
+ response = formatter.format_healthcare_response(scenario_text, results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  return response
199
  except Exception as e:
 
287
  except Exception:
288
  return None
289
 
290
+ def build_inputs(tokenizer, message, history, system_prompt):
291
+ msgs = [{"role": "system", "content": system_prompt}]
292
  for u, a in _iter_user_assistant(history):
293
  if u: msgs.append({"role": "user", "content": u})
294
  if a: msgs.append({"role": "assistant", "content": a})
 
352
  response = handle_healthcare_scenario(safe_in, data_registry, history)
353
  return history + [(user_msg, response)], False
354
  else:
355
+ # General conversation mode with enhanced handling
 
356
  if USE_HOSTED_COHERE:
357
  out = cohere_chat(safe_in, history)
358
  if out:
 
367
  })
368
  return history + [(user_msg, safe_out)], False
369
 
370
+ # Enhanced local model generation
371
  try:
372
  model, tokenizer = load_local_model()
373
+
374
+ # Use general conversation prompt
375
+ inputs = build_inputs(tokenizer, safe_in, history, GENERAL_CONVERSATION_PROMPT)
376
  out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
377
 
378
  if isinstance(out, str):