# app.py - Enhanced Healthcare Scenario Analysis System import os, re, json, traceback, pathlib from functools import lru_cache from typing import List, Dict, Any, Tuple, Optional import pandas as pd import numpy as np import gradio as gr import torch import regex as re2 # Import necessary modules (assuming they exist in your environment) from settings import SNAPSHOT_PATH, PERSIST_CONTENT from audit_log import log_event, hash_summary from privacy import redact_text, safety_filter, refusal_reply # ---------- Writable caches (HF Spaces-safe) ---------- HOME = pathlib.Path.home() HF_HOME = str(HOME / ".cache" / "huggingface") HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub") HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers") ST_HOME = str(HOME / ".cache" / "sentence-transformers") GRADIO_TMP = str(HOME / "app" / "gradio") GRADIO_CACHE = GRADIO_TMP os.environ.setdefault("HF_HOME", HF_HOME) os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE) os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS) os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME) os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP) os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE) os.environ.setdefault("HF_HUB_ENABLE_XET", "0") os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") for p in [HF_HOME, HF_HUB_CACHE, HF_TRANSFORMERS, ST_HOME, GRADIO_TMP, GRADIO_CACHE]: try: os.makedirs(p, exist_ok=True) except Exception: pass # Optional Cohere try: import cohere _HAS_COHERE = True except Exception: _HAS_COHERE = False from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import login # ---------- Healthcare-specific constants ---------- HEALTHCARE_KEYWORDS = [ "hospital", "patient", "bed", "care", "health", "medical", "clinical", "facility", "nursing", "residential", "ambulatory", "healthcare", "occupancy", "capacity", "staff", "zone", "province", "alberta", "cihi", "odhf", "respiratory", "virus", "flu", "surge", "acute", "long-term", "ltc" ] HEALTHCARE_FACILITY_TYPES = { "Hospitals": ["hospital", "medical center", "health centre"], "Nursing and residential care facilities": ["nursing", "residential", "care facility", "long-term care"], "Ambulatory health care services": ["ambulatory", "clinic", "surgery center", "outpatient"] } # ---------- Config ---------- MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct") HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN") COHERE_API_KEY = os.getenv("COHERE_API_KEY") USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE) MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048")) # ---------- Generic System Prompt ---------- SYSTEM_MASTER = """ SYSTEM ROLE You are an AI analytical system that provides data-driven insights for any scenario. Absolute rules: - Use ONLY information provided in this conversation (scenario text + uploaded files + user answers). - Never invent data. If something required is missing after clarifications, write the literal token: INSUFFICIENT_DATA. - Provide clear analysis with calculations, evidence, and reasoning. - Maintain privacy safeguards (aggregate data; suppress small cohorts <10). - Adapt your analysis approach to the specific scenario and data provided. Formatting rules for structured analysis: - Start with the header: "Structured Analysis" - Organize analysis into logical sections based on the scenario requirements - End with concrete recommendations and a brief "Provenance" mapping outputs to scenario text, uploaded files, and answers. """.strip() # ---------- Data Registry Class ---------- class DataRegistry: def __init__(self): self.data = {} self.file_metadata = {} def add_path(self, path): try: file_name = os.path.basename(path) if file_name.endswith('.csv'): df = pd.read_csv(path) self.data[file_name] = df self.file_metadata[file_name] = { 'type': 'csv', 'columns': list(df.columns), 'shape': df.shape, 'sample': df.head(3).to_dict('records') } return True except Exception as e: print(f"Error adding {path}: {e}") return False def names(self): return list(self.data.keys()) def get(self, name): return self.data.get(name) def summarize_for_prompt(self): if not self.data: return "No data files registered." summary = [] for name, meta in self.file_metadata.items(): summary.append(f"File: {name}") summary.append(f"Type: {meta['type']}") summary.append(f"Columns: {', '.join(meta['columns'])}") summary.append(f"Shape: {meta['shape']}") summary.append("") return "\n".join(summary) def clear(self): self.data.clear() self.file_metadata.clear() # ---------- Session RAG Class (Simplified) ---------- class SessionRAG: def __init__(self): self.docs = [] self.artifacts = [] self.csv_columns = [] def add_docs(self, chunks): self.docs.extend(chunks) def register_artifacts(self, artifacts): self.artifacts.extend(artifacts) def get_latest_csv_columns(self): return self.csv_columns def retrieve(self, query, k=5): # Simple retrieval - return top k documents return self.docs[:k] if self.docs else [] def clear(self): self.docs.clear() self.artifacts.clear() self.csv_columns.clear() # ---------- Healthcare-specific functions ---------- def is_healthcare_scenario(text: str, uploaded_files_paths) -> bool: """Detect if this is a healthcare scenario with specific indicators.""" t = (text or "").lower() # Check for healthcare keywords has_healthcare_keywords = any(keyword in t for keyword in HEALTHCARE_KEYWORDS) # Check for healthcare facility types has_facility_types = any( any(ftype in t for ftype in types) for types in HEALTHCARE_FACILITY_TYPES.values() ) # Check for healthcare-specific tasks has_healthcare_tasks = any( phrase in t for phrase in [ "bed capacity", "occupancy rates", "facility distribution", "long-term care", "health operations", "resource allocation" ] ) # Check for healthcare data files has_healthcare_files = any( "health" in path.lower() or "facility" in path.lower() or "bed" in path.lower() for path in uploaded_files_paths ) # Check for structured scenario format has_scenario_structure = any( section in t for section in ["background", "situation", "tasks"] ) return (has_healthcare_keywords or has_facility_types or has_healthcare_tasks) and \ (has_healthcare_files or has_scenario_structure) def process_healthcare_data(uploaded_files_paths, data_registry): """Process healthcare data files with robust error handling.""" for file_path in uploaded_files_paths: try: file_name = os.path.basename(file_path).lower() if file_name.endswith('.csv'): df = pd.read_csv(file_path) # Standardize column names df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns] # Handle healthcare-specific data structures if 'facility_name' in df.columns: if 'facility_type' not in df.columns and 'odhf_facility_type' in df.columns: df['facility_type'] = df['odhf_facility_type'] if 'beds_current' in df.columns and 'beds_prev' in df.columns: df['bed_change'] = df['beds_current'] - df['beds_prev'] df['percent_change'] = (df['bed_change'] / df['beds_prev']) * 100 data_registry.add_path(file_path) except Exception as e: print(f"Error processing {file_path}: {e}") log_event("data_processing_error", None, { "file": file_path, "error": str(e) }) def analyze_facility_distribution(facilities_df): """Analyze healthcare facility distribution by type and location.""" try: # Filter to Alberta if province column exists if 'province' in facilities_df.columns: ab_facilities = facilities_df[facilities_df['province'] == 'ab'] else: ab_facilities = facilities_df # Facility type frequency type_counts = ab_facilities['facility_type'].value_counts().to_dict() # Top cities by facility count if 'city' in ab_facilities.columns: city_counts = ab_facilities['city'].value_counts().head(5) top_cities = city_counts.index.tolist() # Breakdown by facility type for top cities city_breakdown = {} for city in top_cities: city_data = ab_facilities[ab_facilities['city'] == city] city_breakdown[city] = city_data['facility_type'].value_counts().to_dict() else: top_cities = [] city_breakdown = {} return { "total_facilities": len(ab_facilities), "type_distribution": type_counts, "top_cities": top_cities, "city_breakdown": city_breakdown } except Exception as e: log_event("facility_analysis_error", None, {"error": str(e)}) return {"error": str(e)} def analyze_bed_capacity(beds_df): """Analyze bed capacity by zone and identify trends.""" try: # Filter to Alberta if province column exists if 'province' in beds_df.columns: ab_beds = beds_df[beds_df['province'] == 'alberta'] else: ab_beds = beds_df # Calculate zone-level summaries if 'zone' in ab_beds.columns: zone_summary = ab_beds.groupby('zone').agg({ 'beds_current': 'sum', 'beds_prev': 'sum', 'bed_change': 'sum' }).reset_index() # Calculate percentage change zone_summary['percent_change'] = (zone_summary['bed_change'] / zone_summary['beds_prev']) * 100 # Find zones with largest changes max_abs_decrease = zone_summary.loc[zone_summary['bed_change'].idxmin()] max_pct_decrease = zone_summary.loc[zone_summary['percent_change'].idxmin()] # Identify facilities with largest declines facilities_decline = ab_beds.sort_values('bed_change').head(5) else: zone_summary = pd.DataFrame() max_abs_decrease = {} max_pct_decrease = {} facilities_decline = pd.DataFrame() return { "zone_summary": zone_summary.to_dict('records'), "max_absolute_decrease": max_abs_decrease.to_dict(), "max_percentage_decrease": max_pct_decrease.to_dict(), "facilities_with_largest_declines": facilities_decline.to_dict('records') } except Exception as e: log_event("bed_analysis_error", None, {"error": str(e)}) return {"error": str(e)} def assess_long_term_capacity(facilities_df, beds_df, zone_name): """Assess long-term care capacity in a specific zone.""" try: # Get facilities in the specified zone if 'zone' in facilities_df.columns: zone_facilities = facilities_df[facilities_df['zone'] == zone_name] else: # If zone column not available, use province zone_facilities = facilities_df[facilities_df['province'] == 'ab'] # Find major city in zone if 'city' in zone_facilities.columns: city_counts = zone_facilities['city'].value_counts() major_city = city_counts.index[0] if len(city_counts) > 0 else None if major_city: city_facilities = zone_facilities[zone_facilities['city'] == major_city] # Count facility types facility_counts = city_facilities['facility_type'].value_counts().to_dict() # Calculate ratio of nursing/residential to hospitals hospitals = facility_counts.get('Hospitals', 0) nursing = facility_counts.get('Nursing and residential care facilities', 0) ratio = nursing / hospitals if hospitals > 0 else 0 # Assess capacity capacity_assessment = "sufficient" if ratio >= 1.5 else "insufficient" return { "zone": zone_name, "major_city": major_city, "facility_counts": facility_counts, "nursing_to_hospital_ratio": ratio, "capacity_assessment": capacity_assessment } return {"error": "Could not determine major city or facility counts"} except Exception as e: log_event("ltc_assessment_error", None, {"error": str(e)}) return {"error": str(e)} def generate_operational_recommendations(analysis_results): """Generate data-driven operational recommendations.""" recommendations = [] # Recommendation 1: Address bed capacity issues if 'bed_capacity' in analysis_results: bed_data = analysis_results['bed_capacity'] if 'max_percentage_decrease' in bed_data: zone = bed_data['max_percentage_decrease'].get('zone', '') decrease = bed_data['max_percentage_decrease'].get('percent_change', 0) recommendations.append({ "title": f"Restore staffed beds in {zone} Zone", "description": f"Priority should be given to reopening closed units and hiring staff to address the {decrease:.1f}% decrease in bed capacity.", "data_source": "Bed capacity analysis" }) # Recommendation 2: Expand long-term care capacity if 'long_term_care' in analysis_results: ltc_data = analysis_results['long_term_care'] if ltc_data.get('capacity_assessment') == 'insufficient': city = ltc_data.get('major_city', '') recommendations.append({ "title": f"Expand long-term care capacity in {city}", "description": f"Invest in new long-term care beds or repurpose existing sites to expedite discharge of stabilized patients.", "data_source": "Long-term care capacity assessment" }) # Recommendation 3: Implement surge plans if 'bed_capacity' in analysis_results: recommendations.append({ "title": "Implement surge capacity plans", "description": "Develop modular units and activate staffing pools to handle unpredictable spikes in demand.", "data_source": "Bed capacity trends" }) return recommendations def generate_ai_integration_discussion(analysis_results): """Generate discussion on future AI integration for healthcare operations.""" return { "title": "Future Integration for Augmented Decision-Making", "description": "Combining facility information with operational data like emergency department wait times and disease surveillance can enable AI-driven resource optimization.", "example": "A model could ingest current ED wait times, hospital occupancy, and community case counts to forecast bed demand by zone and recommend redirecting ambulances to facilities with spare capacity.", "metrics": ["Hospital occupancy rates", "ED wait times", "Disease surveillance data"] } def format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration): """Format the healthcare analysis response with tables and sections.""" response = "# Structured Analysis: Healthcare Scenario\n\n" # Data Preparation Section if 'facility_distribution' in results: fd = results['facility_distribution'] response += "## 1. Data Preparation\n\n" response += f"Total healthcare facilities in Alberta: {fd.get('total_facilities', 'N/A')}\n\n" if 'type_distribution' in fd: response += "### Facility Type Distribution\n\n" for ftype, count in fd['type_distribution'].items(): response += f"- {ftype}: {count}\n" response += "\n" if 'city_breakdown' in fd: response += "### Top Cities by Facility Count\n\n" response += "| City | Hospitals | Nursing/Residential | Ambulatory | Total |\n" response += "|------|-----------|-------------------|------------|-------|\n" for city, breakdown in fd['city_breakdown'].items(): hospitals = breakdown.get('Hospitals', 0) nursing = breakdown.get('Nursing and residential care facilities', 0) ambulatory = breakdown.get('Ambulatory health care services', 0) total = hospitals + nursing + ambulatory response += f"| {city} | {hospitals} | {nursing} | {ambulatory} | {total} |\n" response += "\n" # Bed Capacity Analysis Section if 'bed_capacity' in results: bc = results['bed_capacity'] response += "## 2. Bed Capacity Analysis\n\n" if 'zone_summary' in bc: response += "### Bed Capacity by Zone\n\n" response += "| Zone | Beds (2023-24) | Beds (2022-23) | Absolute Change | Percent Change |\n" response += "|------|---------------|---------------|-----------------|----------------|\n" for zone_data in bc['zone_summary']: zone = zone_data.get('zone', 'N/A') current = zone_data.get('beds_current', 'N/A') prev = zone_data.get('beds_prev', 'N/A') change = zone_data.get('bed_change', 'N/A') pct = zone_data.get('percent_change', 'N/A') response += f"| {zone} | {current} | {prev} | {change} | {pct:.1f}% |\n" response += "\n" if 'max_absolute_decrease' in bc and 'max_percentage_decrease' in bc: abs_dec = bc['max_absolute_decrease'] pct_dec = bc['max_percentage_decrease'] response += f"**Zone with largest absolute decrease**: {abs_dec.get('zone', 'N/A')} ({abs_dec.get('bed_change', 'N/A')} beds)\n\n" response += f"**Zone with largest percentage decrease**: {pct_dec.get('zone', 'N/A')} ({pct_dec.get('percent_change', 'N/A'):.1f}%)\n\n" if 'facilities_with_largest_declines' in bc: response += "### Facilities with Largest Bed Declines\n\n" response += "| Facility | Zone | Teaching Status | Beds Lost |\n" response += "|----------|------|----------------|-----------|\n" for facility in bc['facilities_with_largest_declines']: name = facility.get('facility_name', 'N/A') zone = facility.get('zone', 'N/A') teaching = facility.get('teaching_status', 'N/A') change = facility.get('bed_change', 'N/A') response += f"| {name} | {zone} | {teaching} | {change} |\n" response += "\n" # Long-term Care Section if 'long_term_care' in results: ltc = results['long_term_care'] response += "## 3. Long-Term Care Capacity Assessment\n\n" zone = ltc.get('zone', 'N/A') city = ltc.get('major_city', 'N/A') ratio = ltc.get('nursing_to_hospital_ratio', 0) assessment = ltc.get('capacity_assessment', 'N/A') response += f"In {zone} Zone, the major city is {city} with a nursing/residential to hospital ratio of {ratio:.2f}.\n\n" response += f"Long-term care capacity appears **{assessment}** in {city}.\n\n" if 'facility_counts' in ltc: response += "### Facility Counts\n\n" for ftype, count in ltc['facility_counts'].items(): response += f"- {ftype}: {count}\n" response += "\n" # Recommendations Section response += "## 4. Operational Recommendations\n\n" for rec in recommendations: response += f"### {rec['title']}\n\n" response += f"{rec['description']}\n\n" response += f"*Data source: {rec['data_source']}*\n\n" # AI Integration Section response += "## 5. Future Integration for Augmented AI\n\n" response += f"### {ai_integration['title']}\n\n" response += f"{ai_integration['description']}\n\n" response += f"**Example**: {ai_integration['example']}\n\n" response += "**Key metrics to incorporate**:\n" for metric in ai_integration['metrics']: response += f"- {metric}\n" response += "\n" # Provenance Section response += "## Provenance\n\n" response += "This analysis is based on:\n" response += "- Scenario description provided by the user\n" response += "- Uploaded data files\n" response += "- Calculations performed on the provided data\n" return response def handle_healthcare_scenario(scenario_text, data_registry, history): """Handle healthcare-specific scenario analysis.""" try: # Initialize analysis results results = {} # Task 1: Data preparation facilities_df = None beds_df = None for file_name in data_registry.names(): df = data_registry.get(file_name) if 'facility' in file_name.lower() or 'health' in file_name.lower(): facilities_df = df elif 'bed' in file_name.lower(): beds_df = df if facilities_df is not None: results['facility_distribution'] = analyze_facility_distribution(facilities_df) # Task 2: Bed capacity analysis if beds_df is not None: results['bed_capacity'] = analyze_bed_capacity(beds_df) # Task 3: Long-term care capacity assessment if 'zone' in beds_df.columns and 'max_percentage_decrease' in results['bed_capacity']: worst_zone = results['bed_capacity']['max_percentage_decrease'].get('zone', '') if worst_zone and facilities_df is not None: results['long_term_care'] = assess_long_term_capacity( facilities_df, beds_df, worst_zone ) # Generate operational recommendations recommendations = generate_operational_recommendations(results) # Generate future AI integration discussion ai_integration = generate_ai_integration_discussion(results) # Compile final response response = format_healthcare_analysis_response(scenario_text, results, recommendations, ai_integration) return response except Exception as e: log_event("healthcare_scenario_error", None, {"error": str(e)}) return f"Error analyzing healthcare scenario: {str(e)}" # ---------- Model loading helpers ---------- def pick_dtype_and_map(): if torch.cuda.is_available(): return torch.float16, "auto" if torch.backends.mps.is_available(): return torch.float16, {"": "mps"} return torch.float32, "cpu" @lru_cache(maxsize=1) def load_local_model(): if not HF_TOKEN: raise RuntimeError("HUGGINGFACE_HUB_TOKEN is not set.") login(token=HF_TOKEN, add_to_git_credential=False) dtype, device_map = pick_dtype_and_map() tok = AutoTokenizer.from_pretrained( MODEL_ID, token=HF_TOKEN, use_fast=True, model_max_length=8192, padding_side="left", trust_remote_code=True, cache_dir=os.environ.get("TRANSFORMERS_CACHE") ) try: mdl = AutoModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN, device_map=device_map, low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True, cache_dir=os.environ.get("TRANSFORMERS_CACHE") ) except Exception: mdl = AutoModelForCausalLM.from_pretrained( MODEL_ID, token=HF_TOKEN, low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True, cache_dir=os.environ.get("TRANSFORMERS_CACHE") ) mdl.to("cuda" if torch.cuda.is_available() else "cpu") if mdl.config.eos_token_id is None and tok.eos_token_id is not None: mdl.config.eos_token_id = tok.eos_token_id return mdl, tok # ---------- Chat helpers ---------- def is_identity_query(message, history): patterns = [ r"\bwho\s+are\s+you\b", r"\bwhat\s+are\s+you\b", r"\bwhat\s+is\s+your\s+name\b", r"\bwho\s+is\s+this\b", r"\bidentify\s+yourself\b", r"\btell\s+me\s+about\s+yourself\b", r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b", r"\bwho\s+am\s+i\s+chatting\s+with\b", ] def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns) if match(message): return True if history: last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None if match(last_user): return True return False def _iter_user_assistant(history): for item in (history or []): if isinstance(item, (list, tuple)): u = item[0] if len(item) > 0 else "" a = item[1] if len(item) > 1 else "" yield u, a def _sanitize_text(s: str) -> str: if not isinstance(s, str): return s return re2.sub(r'[\p{C}--[\n\t]]+', '', s) def cohere_chat(message, history): if not USE_HOSTED_COHERE: return None try: client = cohere.Client(api_key=COHERE_API_KEY) parts = [] for u, a in _iter_user_assistant(history): if u: parts.append(f"User: {u}") if a: parts.append(f"Assistant: {a}") parts.append(f"User: {message}") prompt = "\n".join(parts) + "\nAssistant:" resp = client.chat( model="command-r7b-12-2024", message=prompt, temperature=0.3, max_tokens=MAX_NEW_TOKENS, ) if hasattr(resp, "text") and resp.text: return resp.text.strip() if hasattr(resp, "reply") and resp.reply: return resp.reply.strip() if hasattr(resp, "generations") and resp.generations: return resp.generations[0].text.strip() return None except Exception: return None def build_inputs(tokenizer, message, history): msgs = [{"role": "system", "content": SYSTEM_MASTER}] for u, a in _iter_user_assistant(history): if u: msgs.append({"role": "user", "content": u}) if a: msgs.append({"role": "assistant", "content": a}) msgs.append({"role": "user", "content": message}) return tokenizer.apply_chat_template( msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt" ) def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS): input_ids = input_ids.to(model.device) with torch.no_grad(): out = model.generate( input_ids=input_ids, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.3, top_p=0.9, repetition_penalty=1.15, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) gen_only = out[0, input_ids.shape[-1]:] return tokenizer.decode(gen_only, skip_special_tokens=True).strip() # ---------- Core chat logic ---------- def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False): try: log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}}) safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input") if blocked_in: ans = refusal_reply(reason_in) return history + [(user_msg, ans)], awaiting_answers if is_identity_query(safe_in, history): ans = "I am an AI analytical system designed to help you analyze healthcare scenarios and make data-driven decisions." return history + [(user_msg, ans)], awaiting_answers # Initialize data registry and session RAG data_registry = DataRegistry() session_rag = SessionRAG() # Process uploaded files if uploaded_files_paths: process_healthcare_data(uploaded_files_paths, data_registry) # Update session RAG with CSV columns for file_name in data_registry.names(): if file_name.endswith('.csv'): df = data_registry.get(file_name) session_rag.csv_columns = list(df.columns) # Check if this is a healthcare scenario if is_healthcare_scenario(safe_in, uploaded_files_paths): # Handle healthcare scenario directly response = handle_healthcare_scenario(safe_in, data_registry, history) return history + [(user_msg, response)], False # For non-healthcare scenarios, use the original logic # ... (Original non-healthcare scenario handling would go here) # For now, provide a fallback response response = "I can help you analyze this scenario. Please provide more details about what you'd like to analyze." return history + [(user_msg, response)], awaiting_answers except Exception as e: err = f"Error: {e}" try: traceback.print_exc() except Exception: pass return history + [(user_msg, err)], awaiting_answers # ---------- UI Setup ---------- theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg) custom_css = """ :root { --brand-bg: #0f172a; --brand-accent: #0d9488; --brand-text: #0f172a; --brand-text-light: #ffffff; } html, body, .gradio-container { height: 100vh; } .gradio-container { background: var(--brand-bg); display: flex; flex-direction: column; } /* HERO (landing) */ #hero-wrap { height: 70vh; display: grid; place-items: center; } #hero { text-align: center; } #hero h2 { color: #0f172a; font-weight: 800; font-size: 32px; margin-bottom: 22px; } #hero .search-row { width: min(860px, 92vw); margin: 0 auto; display: flex; gap: 8px; align-items: stretch; } #hero .search-row .hero-box { flex: 1 1 auto; } #hero .search-row .hero-box textarea { height: 52px !important; } #hero-send > button { height: 52px !important; padding: 0 18px !important; border-radius: 12px !important; } #hero .hint { color: #334155; margin-top: 10px; font-size: 13px; opacity: 0.9; } /* CHAT */ #chat-container { position: relative; } .chatbot header, .chatbot .label, .chatbot .label-wrap { display: none !important; } .message.user, .message.bot { background: var(--brand-accent) !important; color: var(--brand-text-light) !important; border-radius: 12px !important; padding: 8px 12px !important; } textarea, input, .gr-input { border-radius: 12px !important; } /* Chat input row equal heights */ #chat-input-row { align-items: stretch; } #chat-msg textarea { height: 52px !important; } #chat-send > button, #chat-clear > button { height: 52px !important; padding: 0 18px !important; border-radius: 12px !important; } """ # ---------- Main App ---------- with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo: # --- HERO (initial screen) --- with gr.Column(elem_id="hero-wrap", visible=True) as hero_wrap: with gr.Column(elem_id="hero"): gr.HTML("

What healthcare scenario can I help you analyze?

") with gr.Row(elem_classes="search-row"): hero_msg = gr.Textbox( placeholder="Describe your healthcare scenario or upload data files for analysis…", show_label=False, lines=1, elem_classes="hero-box" ) hero_send = gr.Button("➤", scale=0, elem_id="hero-send") gr.Markdown('
Upload healthcare data files (CSV, PDF, etc.) and describe your scenario for comprehensive analysis.
') # --- MAIN APP (hidden until first message) --- with gr.Column(elem_id="chat-container", visible=False) as app_wrap: chat = gr.Chatbot(label="", show_label=False, height="80vh") with gr.Row(): uploads = gr.Files( label="Upload healthcare data files", file_types=["file"], file_count="multiple", height=68 ) with gr.Row(elem_id="chat-input-row"): msg = gr.Textbox( label="", show_label=False, placeholder="Continue the conversation. Provide additional details or answer clarifying questions.", scale=10, elem_id="chat-msg", lines=1, ) send = gr.Button("Send", scale=1, elem_id="chat-send") clear = gr.Button("Clear chat", scale=1, elem_id="chat-clear") # ---- State state_history = gr.State(value=[]) state_uploaded = gr.State(value=[]) state_awaiting = gr.State(value=False) # ---- Uploads def _store_uploads(files, current): paths = [] for f in (files or []): paths.append(getattr(f, "name", None) or f) return (current or []) + paths uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded) # ---- Core send (used by both hero input and chat input) def _on_send(user_msg, history, up_paths, awaiting): try: if not user_msg or not user_msg.strip(): return history, "", history, awaiting new_history, new_awaiting = clarityops_reply( user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting ) return new_history, "", new_history, new_awaiting except Exception as e: err = f"Error: {e}" try: traceback.print_exc() except Exception: pass new_hist = (history or []) + [(user_msg or "", err)] return new_hist, "", new_hist, awaiting # ---- Hero -> App transition + first send def _hero_start(user_msg, history, up_paths, awaiting): chat_o, msg_o, hist_o, await_o = _on_send(user_msg, history, up_paths, awaiting) return ( chat_o, msg_o, hist_o, await_o, gr.update(visible=False), gr.update(visible=True), "" ) hero_send.click( _hero_start, inputs=[hero_msg, state_history, state_uploaded, state_awaiting], outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg], concurrency_limit=2, queue=True ) hero_msg.submit( _hero_start, inputs=[hero_msg, state_history, state_uploaded, state_awaiting], outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg], concurrency_limit=2, queue=True ) # ---- Normal chat interactions after hero is gone send.click(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting], outputs=[chat, msg, state_history, state_awaiting], concurrency_limit=2, queue=True) msg.submit(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting], outputs=[chat, msg, state_history, state_awaiting], concurrency_limit=2, queue=True) def _on_clear(): return ( [], "", [], False, gr.update(visible=True), gr.update(visible=False), "" ) clear.click(_on_clear, None, [chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg]) if __name__ == "__main__": port = int(os.environ.get("PORT", "7860")) demo.launch(server_name="0.0.0.0", server_port=port, show_api=False, max_threads=40)