Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
# app.py
|
| 2 |
-
# app.py
|
| 3 |
import os, traceback, regex as re2
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
|
@@ -18,11 +17,9 @@ from llm_router import generate_narrative, cohere_chat, open_fallback_chat
|
|
| 18 |
|
| 19 |
def _sanitize_text(s: str) -> str:
|
| 20 |
if not isinstance(s, str): return s
|
| 21 |
-
# strip control chars (keep newlines/tabs)
|
| 22 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 23 |
|
| 24 |
def _dataset_catalog(results: Dict[str, Any]) -> Dict[str, List[str]]:
|
| 25 |
-
"""Expose available columns per dataset to the planner."""
|
| 26 |
cat: Dict[str, List[str]] = {}
|
| 27 |
for k, v in results.items():
|
| 28 |
if isinstance(v, pd.DataFrame):
|
|
@@ -30,14 +27,12 @@ def _dataset_catalog(results: Dict[str, Any]) -> Dict[str, List[str]]:
|
|
| 30 |
return cat
|
| 31 |
|
| 32 |
def is_healthcare_scenario(text: str, has_files: bool) -> bool:
|
| 33 |
-
"""Heuristic: scenario mode when user provided files + scenario-ish text."""
|
| 34 |
t = (text or "").lower()
|
| 35 |
kws = HEALTHCARE_SETTINGS["healthcare_keywords"]
|
| 36 |
structured = any(s in t for s in ["background", "situation", "tasks", "deliverables"])
|
| 37 |
return has_files and (structured or any(k in t for k in kws))
|
| 38 |
|
| 39 |
def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
|
| 40 |
-
"""Return a new history list with one message appended."""
|
| 41 |
return (history_messages or []) + [{"role": role, "content": content}]
|
| 42 |
|
| 43 |
def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
|
|
@@ -49,10 +44,10 @@ def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -
|
|
| 49 |
new_hist = _append_msg(new_hist, "assistant", reply)
|
| 50 |
return new_hist, ""
|
| 51 |
|
| 52 |
-
# Normalize files -> paths
|
| 53 |
file_paths = [getattr(f, "name", None) or f for f in (files or [])]
|
| 54 |
|
| 55 |
-
# Register CSVs
|
| 56 |
registry = DataRegistry()
|
| 57 |
for p in file_paths:
|
| 58 |
try:
|
|
@@ -60,36 +55,34 @@ def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -
|
|
| 60 |
except Exception as e:
|
| 61 |
log_event("ingest_error", None, {"file": p, "err": str(e)})
|
| 62 |
|
| 63 |
-
# RAG ingest (
|
| 64 |
rag = RAGIndex()
|
| 65 |
ing = extract_text_from_files(file_paths)
|
| 66 |
rag.add(ing.get("chunks", []))
|
| 67 |
|
| 68 |
-
# Scenario
|
| 69 |
if is_healthcare_scenario(safe_in, bool(file_paths)) and USE_SCENARIO_ENGINE:
|
| 70 |
analyzer = HealthcareAnalyzer(registry)
|
| 71 |
-
datasets = analyzer.comprehensive_analysis(safe_in)
|
| 72 |
catalog = _dataset_catalog(datasets)
|
| 73 |
|
| 74 |
-
#
|
| 75 |
plan = parse_to_plan(safe_in, catalog)
|
| 76 |
|
| 77 |
-
#
|
| 78 |
structured_md = ScenarioEngine.execute_plan(plan, datasets)
|
| 79 |
|
| 80 |
-
#
|
| 81 |
rag_hits = [txt for txt, _ in rag.retrieve(safe_in, k=6)]
|
| 82 |
narrative = generate_narrative(safe_in, structured_md, rag_hits)
|
| 83 |
|
| 84 |
-
|
| 85 |
-
reply = _sanitize_text(final)
|
| 86 |
else:
|
| 87 |
-
# General
|
| 88 |
prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
|
| 89 |
reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
|
| 90 |
reply = _sanitize_text(reply)
|
| 91 |
|
| 92 |
-
# Append user then assistant messages to history
|
| 93 |
new_hist = _append_msg(history_messages, "user", user_msg)
|
| 94 |
new_hist = _append_msg(new_hist, "assistant", reply)
|
| 95 |
return new_hist, ""
|
|
@@ -103,9 +96,8 @@ def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -
|
|
| 103 |
|
| 104 |
# -------- UI --------
|
| 105 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 106 |
-
gr.Markdown("## Canadian Healthcare AI • Scenario-Agnostic
|
| 107 |
-
|
| 108 |
-
chat = gr.Chatbot(type="messages", height=520)
|
| 109 |
files = gr.Files(
|
| 110 |
file_count="multiple",
|
| 111 |
type="filepath",
|
|
@@ -116,7 +108,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 116 |
clear = gr.Button("Clear")
|
| 117 |
|
| 118 |
def _on_send(m, h, f):
|
| 119 |
-
# h is already a list of {'role','content'} dicts with type="messages"
|
| 120 |
h2, _ = handle(m, h or [], f or [])
|
| 121 |
return h2, ""
|
| 122 |
|
|
|
|
| 1 |
# app.py
|
|
|
|
| 2 |
import os, traceback, regex as re2
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
|
|
|
| 17 |
|
| 18 |
def _sanitize_text(s: str) -> str:
|
| 19 |
if not isinstance(s, str): return s
|
|
|
|
| 20 |
return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
|
| 21 |
|
| 22 |
def _dataset_catalog(results: Dict[str, Any]) -> Dict[str, List[str]]:
|
|
|
|
| 23 |
cat: Dict[str, List[str]] = {}
|
| 24 |
for k, v in results.items():
|
| 25 |
if isinstance(v, pd.DataFrame):
|
|
|
|
| 27 |
return cat
|
| 28 |
|
| 29 |
def is_healthcare_scenario(text: str, has_files: bool) -> bool:
|
|
|
|
| 30 |
t = (text or "").lower()
|
| 31 |
kws = HEALTHCARE_SETTINGS["healthcare_keywords"]
|
| 32 |
structured = any(s in t for s in ["background", "situation", "tasks", "deliverables"])
|
| 33 |
return has_files and (structured or any(k in t for k in kws))
|
| 34 |
|
| 35 |
def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
|
|
|
|
| 36 |
return (history_messages or []) + [{"role": role, "content": content}]
|
| 37 |
|
| 38 |
def handle(user_msg: str, history_messages: List[Dict[str, str]], files: list) -> Tuple[List[Dict[str, str]], str]:
|
|
|
|
| 44 |
new_hist = _append_msg(new_hist, "assistant", reply)
|
| 45 |
return new_hist, ""
|
| 46 |
|
| 47 |
+
# Normalize files -> paths
|
| 48 |
file_paths = [getattr(f, "name", None) or f for f in (files or [])]
|
| 49 |
|
| 50 |
+
# Register CSVs
|
| 51 |
registry = DataRegistry()
|
| 52 |
for p in file_paths:
|
| 53 |
try:
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
log_event("ingest_error", None, {"file": p, "err": str(e)})
|
| 57 |
|
| 58 |
+
# RAG ingest (text only; safe on empty)
|
| 59 |
rag = RAGIndex()
|
| 60 |
ing = extract_text_from_files(file_paths)
|
| 61 |
rag.add(ing.get("chunks", []))
|
| 62 |
|
| 63 |
+
# Scenario flow
|
| 64 |
if is_healthcare_scenario(safe_in, bool(file_paths)) and USE_SCENARIO_ENGINE:
|
| 65 |
analyzer = HealthcareAnalyzer(registry)
|
| 66 |
+
datasets = analyzer.comprehensive_analysis(safe_in)
|
| 67 |
catalog = _dataset_catalog(datasets)
|
| 68 |
|
| 69 |
+
# LLM → plan (Cohere API)
|
| 70 |
plan = parse_to_plan(safe_in, catalog)
|
| 71 |
|
| 72 |
+
# Deterministic execution
|
| 73 |
structured_md = ScenarioEngine.execute_plan(plan, datasets)
|
| 74 |
|
| 75 |
+
# Narrative via Cohere API (fallback only if enabled)
|
| 76 |
rag_hits = [txt for txt, _ in rag.retrieve(safe_in, k=6)]
|
| 77 |
narrative = generate_narrative(safe_in, structured_md, rag_hits)
|
| 78 |
|
| 79 |
+
reply = _sanitize_text(f"{structured_md}\n\n# Narrative & Recommendations\n\n{narrative}")
|
|
|
|
| 80 |
else:
|
| 81 |
+
# General chat via Cohere API
|
| 82 |
prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
|
| 83 |
reply = cohere_chat(prompt) or open_fallback_chat(prompt) or "How can I help further?"
|
| 84 |
reply = _sanitize_text(reply)
|
| 85 |
|
|
|
|
| 86 |
new_hist = _append_msg(history_messages, "user", user_msg)
|
| 87 |
new_hist = _append_msg(new_hist, "assistant", reply)
|
| 88 |
return new_hist, ""
|
|
|
|
| 96 |
|
| 97 |
# -------- UI --------
|
| 98 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 99 |
+
gr.Markdown("## Canadian Healthcare AI • Cohere API • Scenario-Agnostic • Deterministic analytics")
|
| 100 |
+
chat = gr.Chatbot(type="messages", height=520) # OpenAI-style role/content
|
|
|
|
| 101 |
files = gr.Files(
|
| 102 |
file_count="multiple",
|
| 103 |
type="filepath",
|
|
|
|
| 108 |
clear = gr.Button("Clear")
|
| 109 |
|
| 110 |
def _on_send(m, h, f):
|
|
|
|
| 111 |
h2, _ = handle(m, h or [], f or [])
|
| 112 |
return h2, ""
|
| 113 |
|