Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,11 +10,16 @@ import torch
|
|
| 10 |
import regex as re2
|
| 11 |
|
| 12 |
# Import necessary modules
|
| 13 |
-
from settings import
|
|
|
|
|
|
|
|
|
|
| 14 |
from audit_log import log_event, hash_summary
|
| 15 |
from privacy import redact_text, safety_filter, refusal_reply
|
| 16 |
from data_registry import DataRegistry
|
| 17 |
from upload_ingest import extract_text_from_files
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# ---------- Writable caches (HF Spaces-safe) ----------
|
| 20 |
HOME = pathlib.Path.home()
|
|
@@ -57,22 +62,6 @@ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
|
|
| 57 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 58 |
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", MODEL_SETTINGS.get("max_new_tokens", 2048)))
|
| 59 |
|
| 60 |
-
# ---------- Generic System Prompt ----------
|
| 61 |
-
SYSTEM_MASTER = """
|
| 62 |
-
SYSTEM ROLE
|
| 63 |
-
You are an AI analytical system that provides data-driven insights for any scenario.
|
| 64 |
-
Absolute rules:
|
| 65 |
-
- Use ONLY information provided in this conversation (scenario text + uploaded files + user answers).
|
| 66 |
-
- Never invent data. If something required is missing after clarifications, write the literal token: INSUFFICIENT_DATA.
|
| 67 |
-
- Provide clear analysis with calculations, evidence, and reasoning.
|
| 68 |
-
- Maintain privacy safeguards (aggregate data; suppress small cohorts <10).
|
| 69 |
-
- Adapt your analysis approach to the specific scenario and data provided.
|
| 70 |
-
Formatting rules for structured analysis:
|
| 71 |
-
- Start with the header: "Structured Analysis"
|
| 72 |
-
- Organize analysis into logical sections based on the scenario requirements
|
| 73 |
-
- End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers.
|
| 74 |
-
""".strip()
|
| 75 |
-
|
| 76 |
# ---------- Helper Functions ----------
|
| 77 |
def find_column(df, patterns):
|
| 78 |
"""Find the first column in df that matches any of the patterns."""
|
|
@@ -193,447 +182,18 @@ def process_healthcare_data(uploaded_files_paths, data_registry):
|
|
| 193 |
"error": str(e)
|
| 194 |
})
|
| 195 |
|
| 196 |
-
def analyze_facility_distribution(facilities_df):
|
| 197 |
-
"""Analyze healthcare facility distribution dynamically."""
|
| 198 |
-
try:
|
| 199 |
-
# Validate input
|
| 200 |
-
if facilities_df is None or facilities_df.empty:
|
| 201 |
-
return {"error": "No facility data provided"}
|
| 202 |
-
|
| 203 |
-
# Find province column
|
| 204 |
-
province_col = find_column(facilities_df, ['province', 'state', 'territory'])
|
| 205 |
-
if province_col:
|
| 206 |
-
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 207 |
-
ab_facilities = facilities_df[alberta_mask].copy()
|
| 208 |
-
else:
|
| 209 |
-
ab_facilities = facilities_df.copy()
|
| 210 |
-
|
| 211 |
-
# Find facility type column
|
| 212 |
-
type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
|
| 213 |
-
if not type_col:
|
| 214 |
-
return {"error": "Facility type column not found"}
|
| 215 |
-
|
| 216 |
-
# Facility type frequency
|
| 217 |
-
type_counts = ab_facilities[type_col].value_counts().to_dict()
|
| 218 |
-
|
| 219 |
-
# Find city column
|
| 220 |
-
city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
|
| 221 |
-
if city_col:
|
| 222 |
-
city_counts = ab_facilities[city_col].value_counts().head(5)
|
| 223 |
-
top_cities = city_counts.index.tolist()
|
| 224 |
-
|
| 225 |
-
# Breakdown by facility type for top cities
|
| 226 |
-
city_breakdown = {}
|
| 227 |
-
for city in top_cities:
|
| 228 |
-
city_data = ab_facilities[ab_facilities[city_col] == city]
|
| 229 |
-
city_breakdown[city] = city_data[type_col].value_counts().to_dict()
|
| 230 |
-
else:
|
| 231 |
-
top_cities = []
|
| 232 |
-
city_breakdown = {}
|
| 233 |
-
|
| 234 |
-
return {
|
| 235 |
-
"total_facilities": len(ab_facilities),
|
| 236 |
-
"type_distribution": type_counts,
|
| 237 |
-
"top_cities": top_cities,
|
| 238 |
-
"city_breakdown": city_breakdown,
|
| 239 |
-
"columns_used": {
|
| 240 |
-
"facility_type": type_col,
|
| 241 |
-
"city": city_col,
|
| 242 |
-
"province": province_col
|
| 243 |
-
}
|
| 244 |
-
}
|
| 245 |
-
except Exception as e:
|
| 246 |
-
log_event("facility_analysis_error", None, {"error": str(e)})
|
| 247 |
-
return {"error": str(e)}
|
| 248 |
-
|
| 249 |
-
def analyze_bed_capacity(beds_df):
|
| 250 |
-
"""Analyze bed capacity dynamically."""
|
| 251 |
-
try:
|
| 252 |
-
# Validate input
|
| 253 |
-
if beds_df is None or beds_df.empty:
|
| 254 |
-
return {"error": "No bed data provided"}
|
| 255 |
-
|
| 256 |
-
# Find required columns
|
| 257 |
-
current_col = find_column(beds_df, ['current', '2023', '2024', 'beds_current', 'staffed_beds', 'capacity'])
|
| 258 |
-
prev_col = find_column(beds_df, ['prev', 'previous', '2022', 'beds_prev', 'previous_beds'])
|
| 259 |
-
|
| 260 |
-
if not current_col or not prev_col:
|
| 261 |
-
return {"error": f"Missing required columns. Found current: {current_col}, prev: {prev_col}"}
|
| 262 |
-
|
| 263 |
-
# Ensure derived columns exist
|
| 264 |
-
if 'bed_change' not in beds_df.columns:
|
| 265 |
-
beds_df['bed_change'] = beds_df[current_col] - beds_df[prev_col]
|
| 266 |
-
|
| 267 |
-
if 'percent_change' not in beds_df.columns:
|
| 268 |
-
beds_df['percent_change'] = beds_df.apply(
|
| 269 |
-
lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
|
| 270 |
-
axis=1
|
| 271 |
-
)
|
| 272 |
-
|
| 273 |
-
# Filter to Alberta if province column exists
|
| 274 |
-
province_col = find_column(beds_df, ['province', 'state', 'territory'])
|
| 275 |
-
if province_col:
|
| 276 |
-
alberta_mask = beds_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 277 |
-
ab_beds = beds_df[alberta_mask].copy()
|
| 278 |
-
else:
|
| 279 |
-
ab_beds = beds_df.copy()
|
| 280 |
-
|
| 281 |
-
# Calculate zone-level summaries if zone column exists
|
| 282 |
-
zone_col = find_column(beds_df, ['zone', 'region', 'area', 'district'])
|
| 283 |
-
if zone_col:
|
| 284 |
-
zone_summary = ab_beds.groupby(zone_col).agg({
|
| 285 |
-
current_col: 'sum',
|
| 286 |
-
prev_col: 'sum',
|
| 287 |
-
'bed_change': 'sum'
|
| 288 |
-
}).reset_index()
|
| 289 |
-
|
| 290 |
-
zone_summary['percent_change'] = zone_summary.apply(
|
| 291 |
-
lambda row: (row['bed_change'] / row[prev_col] * 100) if row[prev_col] != 0 else 0,
|
| 292 |
-
axis=1
|
| 293 |
-
)
|
| 294 |
-
|
| 295 |
-
# Find zones with largest changes
|
| 296 |
-
if len(zone_summary) > 0:
|
| 297 |
-
max_abs_decrease_idx = zone_summary['bed_change'].idxmin()
|
| 298 |
-
max_pct_decrease_idx = zone_summary['percent_change'].idxmin()
|
| 299 |
-
|
| 300 |
-
max_abs_decrease = zone_summary.loc[max_abs_decrease_idx]
|
| 301 |
-
max_pct_decrease = zone_summary.loc[max_pct_decrease_idx]
|
| 302 |
-
else:
|
| 303 |
-
max_abs_decrease = {}
|
| 304 |
-
max_pct_decrease = {}
|
| 305 |
-
|
| 306 |
-
# Identify facilities with largest declines
|
| 307 |
-
facilities_decline = ab_beds.sort_values('bed_change').head(5)
|
| 308 |
-
else:
|
| 309 |
-
zone_summary = pd.DataFrame()
|
| 310 |
-
max_abs_decrease = {}
|
| 311 |
-
max_pct_decrease = {}
|
| 312 |
-
facilities_decline = pd.DataFrame()
|
| 313 |
-
|
| 314 |
-
return {
|
| 315 |
-
"zone_summary": zone_summary.to_dict('records') if not zone_summary.empty else [],
|
| 316 |
-
"max_absolute_decrease": max_abs_decrease.to_dict() if isinstance(max_abs_decrease, pd.Series) else max_abs_decrease,
|
| 317 |
-
"max_percentage_decrease": max_pct_decrease.to_dict() if isinstance(max_pct_decrease, pd.Series) else max_pct_decrease,
|
| 318 |
-
"facilities_with_largest_declines": facilities_decline.to_dict('records') if not facilities_decline.empty else [],
|
| 319 |
-
"columns_used": {
|
| 320 |
-
"beds_current": current_col,
|
| 321 |
-
"beds_prev": prev_col,
|
| 322 |
-
"zone": zone_col,
|
| 323 |
-
"province": province_col
|
| 324 |
-
}
|
| 325 |
-
}
|
| 326 |
-
except Exception as e:
|
| 327 |
-
log_event("bed_analysis_error", None, {"error": str(e)})
|
| 328 |
-
return {"error": str(e)}
|
| 329 |
-
|
| 330 |
-
def assess_long_term_capacity(facilities_df, beds_df, zone_name):
|
| 331 |
-
"""Assess long-term care capacity dynamically."""
|
| 332 |
-
try:
|
| 333 |
-
# Validate inputs
|
| 334 |
-
if facilities_df is None or facilities_df.empty:
|
| 335 |
-
return {"error": "No facility data provided"}
|
| 336 |
-
|
| 337 |
-
# Find relevant columns
|
| 338 |
-
zone_col = find_column(facilities_df, ['zone', 'region', 'area', 'district'])
|
| 339 |
-
city_col = find_column(facilities_df, ['city', 'municipality', 'town'])
|
| 340 |
-
type_col = find_column(facilities_df, ['type', 'category', 'class', 'facility_type', 'odhf_facility_type'])
|
| 341 |
-
|
| 342 |
-
if not type_col:
|
| 343 |
-
return {"error": "Facility type column not found"}
|
| 344 |
-
|
| 345 |
-
# Get facilities in the specified zone
|
| 346 |
-
if zone_col:
|
| 347 |
-
zone_facilities = facilities_df[facilities_df[zone_col] == zone_name].copy()
|
| 348 |
-
else:
|
| 349 |
-
# If zone column not available, use province
|
| 350 |
-
province_col = find_column(facilities_df, ['province', 'state', 'territory'])
|
| 351 |
-
if province_col:
|
| 352 |
-
alberta_mask = facilities_df[province_col].str.lower().isin(['alberta', 'ab'])
|
| 353 |
-
zone_facilities = facilities_df[alberta_mask].copy()
|
| 354 |
-
else:
|
| 355 |
-
zone_facilities = facilities_df.copy()
|
| 356 |
-
|
| 357 |
-
# Find major city in zone
|
| 358 |
-
if city_col:
|
| 359 |
-
city_counts = zone_facilities[city_col].value_counts()
|
| 360 |
-
if len(city_counts) > 0:
|
| 361 |
-
major_city = city_counts.index[0]
|
| 362 |
-
city_facilities = zone_facilities[zone_facilities[city_col] == major_city]
|
| 363 |
-
|
| 364 |
-
# Count facility types
|
| 365 |
-
facility_counts = city_facilities[type_col].value_counts().to_dict()
|
| 366 |
-
|
| 367 |
-
# Calculate ratio of nursing/residential to hospitals
|
| 368 |
-
hospitals = sum(count for key, count in facility_counts.items() if 'hospital' in key.lower())
|
| 369 |
-
nursing = sum(count for key, count in facility_counts.items() if any(word in key.lower() for word in ['nursing', 'residential', 'care']))
|
| 370 |
-
ratio = nursing / hospitals if hospitals > 0 else 0
|
| 371 |
-
|
| 372 |
-
# Assess capacity
|
| 373 |
-
capacity_assessment = "sufficient" if ratio >= 1.5 else "insufficient"
|
| 374 |
-
|
| 375 |
-
return {
|
| 376 |
-
"zone": zone_name,
|
| 377 |
-
"major_city": major_city,
|
| 378 |
-
"facility_counts": facility_counts,
|
| 379 |
-
"nursing_to_hospital_ratio": ratio,
|
| 380 |
-
"capacity_assessment": capacity_assessment,
|
| 381 |
-
"columns_used": {
|
| 382 |
-
"zone": zone_col,
|
| 383 |
-
"city": city_col,
|
| 384 |
-
"facility_type": type_col
|
| 385 |
-
}
|
| 386 |
-
}
|
| 387 |
-
|
| 388 |
-
return {"error": "Could not determine major city or facility counts"}
|
| 389 |
-
except Exception as e:
|
| 390 |
-
log_event("ltc_assessment_error", None, {"error": str(e)})
|
| 391 |
-
return {"error": str(e)}
|
| 392 |
-
|
| 393 |
-
def generate_operational_recommendations(analysis_results):
|
| 394 |
-
"""Generate data-driven operational recommendations."""
|
| 395 |
-
recommendations = []
|
| 396 |
-
|
| 397 |
-
# Recommendation 1: Address bed capacity issues
|
| 398 |
-
if 'bed_capacity' in analysis_results:
|
| 399 |
-
bed_data = analysis_results['bed_capacity']
|
| 400 |
-
if 'max_percentage_decrease' in bed_data and isinstance(bed_data['max_percentage_decrease'], dict):
|
| 401 |
-
zone_col = bed_data.get('columns_used', {}).get('zone')
|
| 402 |
-
zone = bed_data['max_percentage_decrease'].get(zone_col, '') if zone_col else ''
|
| 403 |
-
decrease = bed_data['max_percentage_decrease'].get('percent_change', 0)
|
| 404 |
-
if zone and decrease:
|
| 405 |
-
recommendations.append({
|
| 406 |
-
"title": f"Restore staffed beds in {zone} Zone",
|
| 407 |
-
"description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.",
|
| 408 |
-
"data_source": "Bed capacity analysis"
|
| 409 |
-
})
|
| 410 |
-
|
| 411 |
-
# Recommendation 2: Expand long-term care capacity
|
| 412 |
-
if 'long_term_care' in analysis_results:
|
| 413 |
-
ltc_data = analysis_results['long_term_care']
|
| 414 |
-
if ltc_data.get('capacity_assessment') == 'insufficient':
|
| 415 |
-
city = ltc_data.get('major_city', '')
|
| 416 |
-
if city:
|
| 417 |
-
recommendations.append({
|
| 418 |
-
"title": f"Expand long-term care capacity in {city}",
|
| 419 |
-
"description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.",
|
| 420 |
-
"data_source": "Long-term care capacity assessment"
|
| 421 |
-
})
|
| 422 |
-
|
| 423 |
-
# Recommendation 3: Implement surge plans
|
| 424 |
-
if 'bed_capacity' in analysis_results:
|
| 425 |
-
recommendations.append({
|
| 426 |
-
"title": "Implement surge capacity plans",
|
| 427 |
-
"description": "Develop modular units and activate staffing pools to handle unpredictable spikes in demand.",
|
| 428 |
-
"data_source": "Bed capacity trends"
|
| 429 |
-
})
|
| 430 |
-
|
| 431 |
-
return recommendations
|
| 432 |
-
|
| 433 |
-
def generate_ai_integration_discussion(analysis_results):
|
| 434 |
-
"""Generate discussion on future AI integration for healthcare operations."""
|
| 435 |
-
return {
|
| 436 |
-
"title": "Future Integration for Augmented Decision-Making",
|
| 437 |
-
"description": "Combining facility information with operational data like emergency department wait times and disease surveillance can enable AI-driven resource optimization.",
|
| 438 |
-
"example": "A model could ingest current ED wait times, hospital occupancy, and community case counts to forecast bed demand by zone and recommend redirecting ambulances to facilities with spare capacity.",
|
| 439 |
-
"metrics": ["Hospital occupancy rates", "ED wait times", "Disease surveillance data"]
|
| 440 |
-
}
|
| 441 |
-
|
| 442 |
-
def format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration):
|
| 443 |
-
"""Format the healthcare analysis response with tables and sections."""
|
| 444 |
-
response = "# Structured Analysis: Healthcare Scenario\n\n"
|
| 445 |
-
|
| 446 |
-
# Extract tasks from scenario to ensure we address all requirements
|
| 447 |
-
tasks = extract_scenario_tasks(scenario_text)
|
| 448 |
-
|
| 449 |
-
# Data Preparation Section
|
| 450 |
-
if 'facility_distribution' in results:
|
| 451 |
-
fd = results['facility_distribution']
|
| 452 |
-
if 'error' in fd:
|
| 453 |
-
response += "## 1. Data Preparation\n\n"
|
| 454 |
-
response += f"Error in facility distribution analysis: {fd['error']}\n\n"
|
| 455 |
-
else:
|
| 456 |
-
response += "## 1. Data Preparation\n\n"
|
| 457 |
-
response += f"Total healthcare facilities: {fd.get('total_facilities', 'N/A')}\n\n"
|
| 458 |
-
|
| 459 |
-
if 'type_distribution' in fd and isinstance(fd['type_distribution'], dict):
|
| 460 |
-
response += "### Facility Type Distribution\n\n"
|
| 461 |
-
for ftype, count in fd['type_distribution'].items():
|
| 462 |
-
response += f"- {ftype}: {count}\n"
|
| 463 |
-
response += "\n"
|
| 464 |
-
|
| 465 |
-
if 'city_breakdown' in fd and isinstance(fd['city_breakdown'], dict):
|
| 466 |
-
response += "### Top Cities by Facility Count\n\n"
|
| 467 |
-
response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n"
|
| 468 |
-
response += "|------|-----------|-------------------|------------|-------|\n"
|
| 469 |
-
|
| 470 |
-
for city, breakdown in fd['city_breakdown'].items():
|
| 471 |
-
hospitals = breakdown.get('Hospitals', 0)
|
| 472 |
-
nursing = breakdown.get('Nursing and residential care facilities', 0)
|
| 473 |
-
ambulatory = breakdown.get('Ambulatory health care services', 0)
|
| 474 |
-
total = hospitals + nursing + ambulatory
|
| 475 |
-
response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n"
|
| 476 |
-
response += "\n"
|
| 477 |
-
|
| 478 |
-
# Bed Capacity Analysis Section
|
| 479 |
-
if 'bed_capacity' in results:
|
| 480 |
-
bc = results['bed_capacity']
|
| 481 |
-
if 'error' in bc:
|
| 482 |
-
response += "## 2. Bed Capacity Analysis\n\n"
|
| 483 |
-
response += f"Error in bed capacity analysis: {bc['error']}\n\n"
|
| 484 |
-
else:
|
| 485 |
-
response += "## 2. Bed Capacity Analysis\n\n"
|
| 486 |
-
|
| 487 |
-
if 'zone_summary' in bc and bc['zone_summary']:
|
| 488 |
-
response += "### Bed Capacity by Zone\n\n"
|
| 489 |
-
response += "| Zone | Beds (Current) | Beds (Previous) | Absolute Change | Percent Change |\n"
|
| 490 |
-
response += "|------|---------------|-----------------|-----------------|----------------|\n"
|
| 491 |
-
|
| 492 |
-
zone_col = bc.get('columns_used', {}).get('zone')
|
| 493 |
-
current_col = bc.get('columns_used', {}).get('beds_current')
|
| 494 |
-
prev_col = bc.get('columns_used', {}).get('beds_prev')
|
| 495 |
-
|
| 496 |
-
for zone_data in bc['zone_summary']:
|
| 497 |
-
zone = zone_data.get(zone_col, 'N/A') if zone_col else 'N/A'
|
| 498 |
-
current = zone_data.get(current_col, 'N/A') if current_col else 'N/A'
|
| 499 |
-
prev = zone_data.get(prev_col, 'N/A') if prev_col else 'N/A'
|
| 500 |
-
change = zone_data.get('bed_change', 'N/A')
|
| 501 |
-
pct = zone_data.get('percent_change', 'N/A')
|
| 502 |
-
response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n"
|
| 503 |
-
response += "\n"
|
| 504 |
-
|
| 505 |
-
if 'max_absolute_decrease' in bc and isinstance(bc['max_absolute_decrease'], dict) and \
|
| 506 |
-
'max_percentage_decrease' in bc and isinstance(bc['max_percentage_decrease'], dict):
|
| 507 |
-
zone_col = bc.get('columns_used', {}).get('zone')
|
| 508 |
-
abs_dec = bc['max_absolute_decrease']
|
| 509 |
-
pct_dec = bc['max_percentage_decrease']
|
| 510 |
-
response += f"**Zone with largest absolute decrease**: {abs_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n"
|
| 511 |
-
response += f"**Zone with largest percentage decrease**: {pct_dec.get(zone_col, 'N/A') if zone_col else 'N/A'} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n"
|
| 512 |
-
|
| 513 |
-
if 'facilities_with_largest_declines' in bc and bc['facilities_with_largest_declines']:
|
| 514 |
-
response += "### Facilities with Largest Bed Declines\n\n"
|
| 515 |
-
response += "| Facility | Zone | Teaching Status | Beds Lost |\n"
|
| 516 |
-
response += "|----------|------|----------------|-----------|\n"
|
| 517 |
-
|
| 518 |
-
zone_col = bc.get('columns_used', {}).get('zone')
|
| 519 |
-
for facility in bc['facilities_with_largest_declines']:
|
| 520 |
-
name = facility.get('facility_name', 'N/A')
|
| 521 |
-
zone = facility.get(zone_col, 'N/A') if zone_col else 'N/A'
|
| 522 |
-
teaching = facility.get('teaching_status', 'N/A')
|
| 523 |
-
change = facility.get('bed_change', 'N/A')
|
| 524 |
-
response += f"| {name} | {zone} | {teaching} | {change} |\n"
|
| 525 |
-
response += "\n"
|
| 526 |
-
|
| 527 |
-
# Long-term Care Section
|
| 528 |
-
if 'long_term_care' in results:
|
| 529 |
-
ltc = results['long_term_care']
|
| 530 |
-
if 'error' in ltc:
|
| 531 |
-
response += "## 3. Long-Term Care Capacity Assessment\n\n"
|
| 532 |
-
response += f"Error in long-term care assessment: {ltc['error']}\n\n"
|
| 533 |
-
else:
|
| 534 |
-
response += "## 3. Long-Term Care Capacity Assessment\n\n"
|
| 535 |
-
|
| 536 |
-
zone = ltc.get('zone', 'N/A')
|
| 537 |
-
city = ltc.get('major_city', 'N/A')
|
| 538 |
-
ratio = ltc.get('nursing_to_hospital_ratio', 0)
|
| 539 |
-
assessment = ltc.get('capacity_assessment', 'N/A')
|
| 540 |
-
|
| 541 |
-
response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n"
|
| 542 |
-
response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n"
|
| 543 |
-
|
| 544 |
-
if 'facility_counts' in ltc and isinstance(ltc['facility_counts'], dict):
|
| 545 |
-
response += "### Facility Counts\n\n"
|
| 546 |
-
for ftype, count in ltc['facility_counts'].items():
|
| 547 |
-
response += f"- {ftype}: {count}\n"
|
| 548 |
-
response += "\n"
|
| 549 |
-
|
| 550 |
-
# Operational Recommendations Section
|
| 551 |
-
response += "## 4. Operational Recommendations\n\n"
|
| 552 |
-
if recommendations:
|
| 553 |
-
for rec in recommendations:
|
| 554 |
-
response += f"### {rec['title']}\n\n"
|
| 555 |
-
response += f"{rec['description']}\n\n"
|
| 556 |
-
response += f"*Data source: {rec['data_source']}*\n\n"
|
| 557 |
-
else:
|
| 558 |
-
response += "No specific recommendations could be generated due to data limitations.\n\n"
|
| 559 |
-
|
| 560 |
-
# AI Integration Section
|
| 561 |
-
response += "## 5. Future Integration for Augmented AI\n\n"
|
| 562 |
-
response += f"### {ai_integration['title']}\n\n"
|
| 563 |
-
response += f"{ai_integration['description']}\n\n"
|
| 564 |
-
response += f"**Example**: {ai_integration['example']}\n\n"
|
| 565 |
-
response += "**Key metrics to incorporate**:\n"
|
| 566 |
-
for metric in ai_integration['metrics']:
|
| 567 |
-
response += f"- {metric}\n"
|
| 568 |
-
response += "\n"
|
| 569 |
-
|
| 570 |
-
# Provenance Section
|
| 571 |
-
response += "## Provenance\n\n"
|
| 572 |
-
response += "This analysis is based on:\n"
|
| 573 |
-
response += "- Scenario description provided by the user\n"
|
| 574 |
-
response += "- Uploaded data files\n"
|
| 575 |
-
response += "- Calculations performed on the provided data\n"
|
| 576 |
-
|
| 577 |
-
return response
|
| 578 |
-
|
| 579 |
def handle_healthcare_scenario(scenario_text, data_registry, history):
|
| 580 |
-
"""Handle healthcare scenarios
|
| 581 |
try:
|
| 582 |
-
|
|
|
|
| 583 |
|
| 584 |
-
#
|
| 585 |
-
|
| 586 |
-
print(f"Extracted tasks: {tasks}")
|
| 587 |
|
| 588 |
-
#
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
# Use the first file of each type (can be enhanced to use multiple)
|
| 593 |
-
facilities_df = None
|
| 594 |
-
if facility_files:
|
| 595 |
-
facilities_df = data_registry.get(facility_files[0])
|
| 596 |
-
|
| 597 |
-
beds_df = None
|
| 598 |
-
if bed_files:
|
| 599 |
-
beds_df = data_registry.get(bed_files[0])
|
| 600 |
-
|
| 601 |
-
# Log what we found
|
| 602 |
-
log_event("data_files_found", None, {
|
| 603 |
-
"facilities": facilities_df is not None,
|
| 604 |
-
"beds": beds_df is not None,
|
| 605 |
-
"facility_files": facility_files,
|
| 606 |
-
"bed_files": bed_files
|
| 607 |
-
})
|
| 608 |
-
|
| 609 |
-
# Task 1: Data preparation (facility distribution)
|
| 610 |
-
if facilities_df is not None:
|
| 611 |
-
results['facility_distribution'] = analyze_facility_distribution(facilities_df)
|
| 612 |
-
|
| 613 |
-
# Task 2: Bed capacity analysis
|
| 614 |
-
if beds_df is not None:
|
| 615 |
-
results['bed_capacity'] = analyze_bed_capacity(beds_df)
|
| 616 |
-
|
| 617 |
-
# Task 3: Long-term care capacity assessment
|
| 618 |
-
if 'bed_capacity' in results and 'max_percentage_decrease' in results['bed_capacity']:
|
| 619 |
-
zone_col = results['bed_capacity'].get('columns_used', {}).get('zone')
|
| 620 |
-
if zone_col:
|
| 621 |
-
worst_zone = results['bed_capacity']['max_percentage_decrease'].get(zone_col, '')
|
| 622 |
-
if worst_zone and facilities_df is not None:
|
| 623 |
-
results['long_term_care'] = assess_long_term_capacity(
|
| 624 |
-
facilities_df,
|
| 625 |
-
beds_df,
|
| 626 |
-
worst_zone
|
| 627 |
-
)
|
| 628 |
-
|
| 629 |
-
# Generate operational recommendations (Task 4.1)
|
| 630 |
-
recommendations = generate_operational_recommendations(results)
|
| 631 |
-
|
| 632 |
-
# Generate AI integration discussion (Task 4.2)
|
| 633 |
-
ai_integration = generate_ai_integration_discussion(results)
|
| 634 |
-
|
| 635 |
-
# Format response ensuring all tasks are addressed
|
| 636 |
-
response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration)
|
| 637 |
|
| 638 |
return response
|
| 639 |
except Exception as e:
|
|
@@ -727,8 +287,8 @@ def cohere_chat(message, history):
|
|
| 727 |
except Exception:
|
| 728 |
return None
|
| 729 |
|
| 730 |
-
def build_inputs(tokenizer, message, history):
|
| 731 |
-
msgs = [{"role": "system", "content":
|
| 732 |
for u, a in _iter_user_assistant(history):
|
| 733 |
if u: msgs.append({"role": "user", "content": u})
|
| 734 |
if a: msgs.append({"role": "assistant", "content": a})
|
|
@@ -792,8 +352,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
|
|
| 792 |
response = handle_healthcare_scenario(safe_in, data_registry, history)
|
| 793 |
return history + [(user_msg, response)], False
|
| 794 |
else:
|
| 795 |
-
# General conversation mode
|
| 796 |
-
# Try Cohere first if available
|
| 797 |
if USE_HOSTED_COHERE:
|
| 798 |
out = cohere_chat(safe_in, history)
|
| 799 |
if out:
|
|
@@ -808,10 +367,12 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
|
|
| 808 |
})
|
| 809 |
return history + [(user_msg, safe_out)], False
|
| 810 |
|
| 811 |
-
#
|
| 812 |
try:
|
| 813 |
model, tokenizer = load_local_model()
|
| 814 |
-
|
|
|
|
|
|
|
| 815 |
out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
|
| 816 |
|
| 817 |
if isinstance(out, str):
|
|
|
|
| 10 |
import regex as re2
|
| 11 |
|
| 12 |
# Import necessary modules
|
| 13 |
+
from settings import (
|
| 14 |
+
SNAPSHOT_PATH, PERSIST_CONTENT, HEALTHCARE_SETTINGS, MODEL_SETTINGS,
|
| 15 |
+
HEALTHCARE_SYSTEM_PROMPT, GENERAL_CONVERSATION_PROMPT
|
| 16 |
+
)
|
| 17 |
from audit_log import log_event, hash_summary
|
| 18 |
from privacy import redact_text, safety_filter, refusal_reply
|
| 19 |
from data_registry import DataRegistry
|
| 20 |
from upload_ingest import extract_text_from_files
|
| 21 |
+
from healthcare_analysis import HealthcareAnalyzer
|
| 22 |
+
from response_formatter import ResponseFormatter
|
| 23 |
|
| 24 |
# ---------- Writable caches (HF Spaces-safe) ----------
|
| 25 |
HOME = pathlib.Path.home()
|
|
|
|
| 62 |
USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
|
| 63 |
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", MODEL_SETTINGS.get("max_new_tokens", 2048)))
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
# ---------- Helper Functions ----------
|
| 66 |
def find_column(df, patterns):
|
| 67 |
"""Find the first column in df that matches any of the patterns."""
|
|
|
|
| 182 |
"error": str(e)
|
| 183 |
})
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
def handle_healthcare_scenario(scenario_text, data_registry, history):
|
| 186 |
+
"""Handle healthcare scenarios with enhanced analysis"""
|
| 187 |
try:
|
| 188 |
+
# Initialize analyzer
|
| 189 |
+
analyzer = HealthcareAnalyzer(data_registry)
|
| 190 |
|
| 191 |
+
# Perform comprehensive analysis
|
| 192 |
+
results = analyzer.comprehensive_analysis(scenario_text)
|
|
|
|
| 193 |
|
| 194 |
+
# Format response
|
| 195 |
+
formatter = ResponseFormatter()
|
| 196 |
+
response = formatter.format_healthcare_response(scenario_text, results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
return response
|
| 199 |
except Exception as e:
|
|
|
|
| 287 |
except Exception:
|
| 288 |
return None
|
| 289 |
|
| 290 |
+
def build_inputs(tokenizer, message, history, system_prompt):
|
| 291 |
+
msgs = [{"role": "system", "content": system_prompt}]
|
| 292 |
for u, a in _iter_user_assistant(history):
|
| 293 |
if u: msgs.append({"role": "user", "content": u})
|
| 294 |
if a: msgs.append({"role": "assistant", "content": a})
|
|
|
|
| 352 |
response = handle_healthcare_scenario(safe_in, data_registry, history)
|
| 353 |
return history + [(user_msg, response)], False
|
| 354 |
else:
|
| 355 |
+
# General conversation mode with enhanced handling
|
|
|
|
| 356 |
if USE_HOSTED_COHERE:
|
| 357 |
out = cohere_chat(safe_in, history)
|
| 358 |
if out:
|
|
|
|
| 367 |
})
|
| 368 |
return history + [(user_msg, safe_out)], False
|
| 369 |
|
| 370 |
+
# Enhanced local model generation
|
| 371 |
try:
|
| 372 |
model, tokenizer = load_local_model()
|
| 373 |
+
|
| 374 |
+
# Use general conversation prompt
|
| 375 |
+
inputs = build_inputs(tokenizer, safe_in, history, GENERAL_CONVERSATION_PROMPT)
|
| 376 |
out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
|
| 377 |
|
| 378 |
if isinstance(out, str):
|