Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 7

Commit

8f6e031

verified ·

1 Parent(s): aa3bd5f

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -4

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ from functools import lru_cache
 import gradio as gr
 import torch
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from audit_log import log_event, hash_summary
 from privacy import redact_text
@@ -91,12 +94,20 @@ def _history_to_prompt(message, history):
     parts.append("Assistant:")
     return "\n".join(parts)
 # ---------- Cohere (default path) ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         return None
     try:
-        # Create client on demand to avoid init errors on some builds
         client = cohere.Client(api_key=COHERE_API_KEY)
         prompt = _history_to_prompt(message, history)
         resp = client.chat(
@@ -198,19 +209,37 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths):
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
             return history + [(user_msg, ans)]
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)]
-        # ---------- Ingest uploads: now returns chunks + artifacts ----------
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
-            chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
             artifacts = ing.get("artifacts", []) if isinstance(ing, dict) else []
             if chunks:
                 _session_rag.add_docs(chunks)
@@ -218,12 +247,22 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths):
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
-        # ---------- Deterministic CSV "columns/headers" handler ----------
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))]
         # Retrieve from session uploads (text chunks)
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics bed flow staffing discharge forecast",
@@ -261,15 +300,19 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths):
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
         if isinstance(out, str):
             for tag in ("Assistant:", "System:", "User:"):
                 if out.startswith(tag):
                     out = out[len(tag):].strip()
         safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
         if blocked_out:
             safe_out = refusal_reply(reason_out)
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
@@ -360,3 +403,4 @@ if __name__ == "__main__":

 import gradio as gr
 import torch
+# NEW: robust control-char sanitizer (requires `regex` package)
+import regex as re2  # pip install regex
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from audit_log import log_event, hash_summary
 from privacy import redact_text
     parts.append("Assistant:")
     return "\n".join(parts)
+def _sanitize_text(s: str) -> str:
+    """
+    Strip control characters (except newline/tab) to avoid garbled UI output.
+    """
+    if not isinstance(s, str):
+        return s
+    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 # ---------- Cohere (default path) ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         return None
     try:
+        # Create client on demand to avoid init errors in some environments
         client = cohere.Client(api_key=COHERE_API_KEY)
         prompt = _history_to_prompt(message, history)
         resp = client.chat(
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
+        # Safety (input)
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
             return history + [(user_msg, ans)]
+        # Identity short-circuit
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)]
+        # Debug slash command: /diag
+        if (safe_in or "").strip().lower().startswith("/diag"):
+            try:
+                chunk_count = len(getattr(_session_rag, "texts", []) or [])
+                cols = _session_rag.get_latest_csv_columns()
+                sample = _session_rag.retrieve("the", k=2)
+                msg = [
+                    f"Chunks in session: {chunk_count}",
+                    f"Latest CSV columns: {', '.join(cols) if cols else '<none>'}",
+                    "Sample retrieved snippets:",
+                    *(sample or ["<no snippets>"])
+                ]
+                return history + [(user_msg, "\n\n".join(msg))]
+            except Exception as e:
+                return history + [(user_msg, f"Diag error: {e}")]
+        # Ingest uploads: returns chunks + artifacts
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
+            chunks = ing.get("chunks", []) if isinstance(ing, dict) else (inf or [])
             artifacts = ing.get("artifacts", []) if isinstance(ing, dict) else []
             if chunks:
                 _session_rag.add_docs(chunks)
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
+        # Deterministic CSV "columns/headers" handler
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))]
+        # Heuristic: scenario mode nudge if a long case study was pasted
+        plain = (safe_in or "").strip().lower()
+        looks_like_case = ("background" in plain and "objective" in plain) or ("case study" in plain)
+        if looks_like_case and len(plain) > 600:
+            safe_in += (
+                "\n\nPlease analyze the scenario above using the Expected Output Format: "
+                "produce structured recommendations, estimates and assumptions, include tables and bullet points, "
+                "and explicitly state how uploaded files (CSV/docs) influenced your estimates."
+            )
         # Retrieve from session uploads (text chunks)
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics bed flow staffing discharge forecast",
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
+        # Tidy echoes and sanitize
         if isinstance(out, str):
             for tag in ("Assistant:", "System:", "User:"):
                 if out.startswith(tag):
                     out = out[len(tag):].strip()
+        out = _sanitize_text(out)
+        # Safety (output)
         safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
         if blocked_out:
             safe_out = refusal_reply(reason_out)
+        # Audit (content-free fingerprints)
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),