File size: 5,993 Bytes
aae6699
325f883
40db972
325f883
aae6699
325f883
dddc062
 
 
 
325f883
 
c1ff5e2
 
ff957d1
325f883
d5495e2
325f883
dddc062
325f883
 
 
 
 
aae6699
 
325f883
 
 
 
 
 
 
 
 
 
 
a2b1fdb
 
 
dddc062
 
 
 
 
 
 
 
 
 
 
 
 
a2b1fdb
325f883
 
aae6699
a2b1fdb
 
 
 
325f883
69f5489
325f883
 
69f5489
325f883
 
aae6699
 
 
 
325f883
69f5489
325f883
 
 
 
69f5489
325f883
 
69f5489
325f883
 
 
 
 
 
 
 
dddc062
 
a2b1fdb
dddc062
a2b1fdb
 
 
 
 
 
 
325f883
 
 
 
a2b1fdb
 
 
325f883
aae6699
325f883
69f5489
dddc062
 
 
 
 
 
 
 
325f883
 
 
 
 
 
 
 
dddc062
325f883
 
 
 
 
dddc062
 
 
 
 
 
 
 
325f883
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# app.py
import os, traceback, regex as re2
import gradio as gr
import pandas as pd
from typing import List, Tuple, Dict, Any

from settings import (
    HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT, USE_SCENARIO_ENGINE,
    DEBUG_PLAN, COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
)
from audit_log import log_event
from privacy import safety_filter, refusal_reply
from data_registry import DataRegistry
from upload_ingest import extract_text_from_files
from healthcare_analysis import HealthcareAnalyzer
from scenario_planner import parse_to_plan
from scenario_engine import ScenarioEngine
from rag import RAGIndex
from llm_router import generate_narrative, cohere_chat, open_fallback_chat, _co_client

def _sanitize_text(s: str) -> str:
    if not isinstance(s, str): return s
    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)

def _dataset_catalog(results: Dict[str, Any]) -> Dict[str, List[str]]:
    cat: Dict[str, List[str]] = {}
    for k, v in results.items():
        if isinstance(v, pd.DataFrame):
            cat[k] = v.columns.tolist()
    return cat

def is_healthcare_scenario(text: str, has_files: bool) -> bool:
    t = (text or "").lower()
    kws = HEALTHCARE_SETTINGS["healthcare_keywords"]
    structured = any(s in t for s in ["background", "situation", "tasks", "deliverables"])
    return has_files and (structured or any(k in t for k in kws))

def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
    return (history_messages or []) + [{"role": role, "content": content}]

def ping_cohere():
    try:
        cli = _co_client()
        if not cli:
            return "Cohere client not initialized. Is COHERE_API_KEY set?"
        from llm_router import cohere_embed
        vecs = cohere_embed(["hello", "world"])
        if vecs and len(vecs) == 2:
            return "Cohere OK ✅ (embed call succeeded)"
        return "Cohere reachable, but embedding returned no vectors."
    except Exception as e:
        return f"Cohere ping failed: {e}"

def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
    try:
        safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
        if blocked_in:
            reply = refusal_reply(reason_in)
            new_hist = _append_msg(history_messages, "user", user_msg)
            new_hist = _append_msg(new_hist, "assistant", reply)
            return new_hist, ""

        # Normalize files -> paths
        file_paths = [getattr(f, "name", None) or f for f in (files or [])]

        # Register CSVs
        registry = DataRegistry()
        for p in file_paths:
            try:
                registry.add_path(p)
            except Exception as e:
                log_event("ingest_error", None, {"file": p, "err": str(e)})

        # RAG ingest (text only; safe on empty)
        rag = RAGIndex()
        ing = extract_text_from_files(file_paths)
        rag.add(ing.get("chunks", []))

        # Scenario flow
        if is_healthcare_scenario(safe_in, bool(file_paths)) and USE_SCENARIO_ENGINE:
            analyzer = HealthcareAnalyzer(registry)
            datasets = analyzer.comprehensive_analysis(safe_in)
            catalog = _dataset_catalog(datasets)

            plan = parse_to_plan(safe_in, catalog)
            structured_md = ScenarioEngine.execute_plan(plan, datasets)

            rag_hits = [txt for txt, _ in rag.retrieve(safe_in, k=6)]
            narrative = generate_narrative(safe_in, structured_md, rag_hits)

            debug_note = f"\n\n> **Planner note:** {getattr(plan, 'notes', '')}" if (DEBUG_PLAN and getattr(plan, "notes", None)) else ""
            reply = _sanitize_text(f"{structured_md}\n\n# Narrative & Recommendations\n\n{narrative}{debug_note}")
        else:
            # General conversation via Cohere (fallback if enabled)
            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
            reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
            reply = _sanitize_text(reply)

        new_hist = _append_msg(history_messages, "user", user_msg)
        new_hist = _append_msg(new_hist, "assistant", reply)
        return new_hist, ""

    except Exception as e:
        tb = traceback.format_exc()
        log_event("app_error", None, {"err": str(e), "tb": tb})
        new_hist = _append_msg(history_messages, "user", user_msg)
        new_hist = _append_msg(new_hist, "assistant", f"Error: {e}\n\n{tb}")
        return new_hist, ""

# -------- UI --------
with gr.Blocks(analytics_enabled=False) as demo:
    gr.Markdown("## Canadian Healthcare AI • Cohere API • Scenario-Agnostic • Deterministic analytics")

    # diagnostics row
    with gr.Row():
        ping_btn = gr.Button("Ping Cohere")
        ping_out = gr.Markdown()

    chat = gr.Chatbot(type="messages", height=520)
    files = gr.Files(file_count="multiple", type="filepath", file_types=HEALTHCARE_SETTINGS["supported_file_types"])
    msg = gr.Textbox(placeholder="Paste any scenario (Background / Situation / Tasks / Deliverables) or just chat.")
    send = gr.Button("Send")
    clear = gr.Button("Clear")

    def _on_send(m, h, f):
        h2, _ = handle(m, h or [], f or [])
        return h2, ""

    ping_btn.click(lambda: ping_cohere(), outputs=[ping_out])
    send.click(_on_send, inputs=[msg, chat, files], outputs=[chat, msg])
    msg.submit(_on_send, inputs=[msg, chat, files], outputs=[chat, msg])
    clear.click(lambda: ([], ""), outputs=[chat, msg])

if __name__ == "__main__":
    from audit_log import log_event
    log_event("startup", None, {
        "cohere_key_present": bool(os.getenv("COHERE_API_KEY")),
        "cohere_model": COHERE_MODEL_PRIMARY,
        "timeout_s": COHERE_TIMEOUT_S,
        "open_fallbacks": USE_OPEN_FALLBACKS
    })
    gr.set_static_paths({})
    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))