Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 10

Commit

b9b3e60

verified ·

1 Parent(s): d312515

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -102

app.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import os, re, json, traceback
 from functools import lru_cache
 import gradio as gr
 import torch
-# NEW: robust control-char sanitizer (requires `regex` package)
 import regex as re2  # pip install regex
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
@@ -12,29 +10,26 @@ from audit_log import log_event, hash_summary
 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
-# Hugging Face caches
-os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
-os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
-os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
-# SentenceTransformers cache (used by retriever.py)
-os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/data/.cache/sentence-transformers")
-# Gradio temp/cache
-os.environ.setdefault("GRADIO_TEMP_DIR", "/data/gradio")
-os.environ.setdefault("GRADIO_CACHE_DIR", "/data/gradio")
-# Disable experimental xet transport; use stable transfer
 os.environ.setdefault("HF_HUB_ENABLE_XET", "0")
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
-for p in [
-    "/data/.cache/huggingface",
-    "/data/.cache/huggingface/hub",
-    "/data/.cache/huggingface/transformers",
-    "/data/.cache/sentence-transformers",
-    "/data/gradio",
-]:
     try:
         os.makedirs(p, exist_ok=True)
     except Exception:
@@ -114,15 +109,9 @@ def pick_dtype_and_map():
 def is_identity_query(message, history):
     patterns = [
-        r"\bwho\s+are\s+you\b",
-        r"\bwhat\s+are\s+you\b",
-        r"\bwhat\s+is\s+your\s+name\b",
-        r"\bwho\s+is\s+this\b",
-        r"\bidentify\s+yourself\b",
-        r"\btell\s+me\s+about\s+yourself\b",
-        r"\bdescribe\s+yourself\b",
-        r"\band\s+you\s*\?\b",
-        r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
     def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns)
@@ -139,13 +128,13 @@ def _iter_user_assistant(history):
             a = item[1] if len(item) > 1 else ""
             yield u, a
 def _history_to_prompt(message, history):
-    """
-    Build a simple chat-style prompt INCLUDING the System Master preamble.
-    """
-    parts = []
-    # system master always first
-    parts.append(f"System: {SYSTEM_MASTER}")
     for u, a in _iter_user_assistant(history):
         if u: parts.append(f"User: {u}")
         if a: parts.append(f"Assistant: {a}")
@@ -153,20 +142,11 @@ def _history_to_prompt(message, history):
     parts.append("Assistant:")
     return "\n".join(parts)
-def _sanitize_text(s: str) -> str:
-    """
-    Strip control characters (except newline/tab) to avoid garbled UI output.
-    """
-    if not isinstance(s, str):
-        return s
-    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
-# ---------- Cohere (default path) ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         return None
     try:
-        # Create client on demand to avoid init errors in some environments
         client = cohere.Client(api_key=COHERE_API_KEY)
         prompt = _history_to_prompt(message, history)
         resp = client.chat(
@@ -182,7 +162,7 @@ def cohere_chat(message, history):
     except Exception:
         return None
-# ---------- Local model (accelerate-safe fallback) ----------
 @lru_cache(maxsize=1)
 def load_local_model():
     if not HF_TOKEN:
@@ -192,16 +172,19 @@ def load_local_model():
     tok = AutoTokenizer.from_pretrained(
         MODEL_ID, token=HF_TOKEN, use_fast=True, model_max_length=8192,
         padding_side="left", trust_remote_code=True,
     )
     try:
         mdl = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, token=HF_TOKEN, device_map=device_map,
             low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
         )
     except Exception:
         mdl = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, token=HF_TOKEN,
             low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
         )
         mdl.to("cuda" if torch.cuda.is_available() else "cpu")
     if mdl.config.eos_token_id is None and tok.eos_token_id is not None:
@@ -209,9 +192,7 @@ def load_local_model():
     return mdl, tok
 def build_inputs(tokenizer, message, history):
-    msgs = []
-    # Always inject system master into the chat template, if supported
-    msgs.append({"role": "system", "content": SYSTEM_MASTER})
     for u, a in _iter_user_assistant(history):
         if u: msgs.append({"role": "user", "content": u})
         if a: msgs.append({"role": "assistant", "content": a})
@@ -233,7 +214,7 @@ def local_generate(model, tokenizer, input_ids, max_new_tokens=MAX_NEW_TOKENS):
     gen_only = out[0, input_ids.shape[-1]:]
     return tokenizer.decode(gen_only, skip_special_tokens=True).strip()
-# ---------- Snapshot loader ----------
 def _load_snapshot(path=SNAPSHOT_PATH):
     try:
         with open(path, "r", encoding="utf-8") as f:
@@ -248,11 +229,9 @@ def _load_snapshot(path=SNAPSHOT_PATH):
             "isolation_needs_waiting": {"contact": 3, "airborne": 1}, "telemetry_needed_waiting": 5
         }
-# ---------- Init retrieval engines ----------
 init_retriever()
-_session_rag = SessionRAG()  # in-memory; supports artifacts (CSV columns)
-# ---------- Executive pre-compute (MDSi block) ----------
 def _mdsi_block():
     base_capacity = capacity_projection(18, 48, 6)
     cons_capacity = capacity_projection(12, 48, 6)
@@ -265,12 +244,12 @@ def _mdsi_block():
         "outcomes_summary": outcomes
     }, indent=2)
-# ---------- Core chat logic with two-phase behavior ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
     awaiting_answers:
-      - False: Phase 1 mode -> generate clarification questions and WAIT
-      - True: Phase 2 mode  -> consume clarifications and produce structured analysis
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
@@ -286,23 +265,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
-        # Debug slash command: /diag
-        if (safe_in or "").strip().lower().startswith("/diag"):
-            try:
-                chunk_count = len(getattr(_session_rag, "texts", []) or [])
-                cols = _session_rag.get_latest_csv_columns()
-                sample = _session_rag.retrieve("the", k=2)
-                msg = [
-                    f"Chunks in session: {chunk_count}",
-                    f"Latest CSV columns: {', '.join(cols) if cols else '<none>'}",
-                    "Sample retrieved snippets:",
-                    *(sample or ["<no snippets>"])
-                ]
-                return history + [(user_msg, "\n\n".join(msg))], awaiting_answers
-            except Exception as e:
-                return history + [(user_msg, f"Diag error: {e}")], awaiting_answers
-        # Ingest uploads: returns chunks + artifacts
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
@@ -313,26 +276,24 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
-        # Deterministic CSV "columns/headers" handler
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
-        # Retrieve from session uploads (text chunks)
         session_snips = "\n---\n".join(_session_rag.retrieve(
-            "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics bed flow staffing discharge forecast",
             k=6
         ))
-        # Load daily snapshot + policies + computed ops numbers
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
-            "mobile diabetes screening Indigenous community outreach logistics referral pathways cultural safety data governance cost effectiveness outcomes bed management discharge acceleration ambulance offload"
         )
         computed = compute_operational_numbers(snapshot)
-        # Exec scenario detect (MDSi)
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
@@ -345,7 +306,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             session_snips=session_snips
         )
-        # Phase-specific instruction appended to the user content
         if not awaiting_answers:
             phase_directive = (
                 "\n\n[INSTRUCTION TO MODEL]\n"
@@ -362,16 +323,14 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + safe_in + phase_directive
-        # Cohere first
         out = cohere_chat(augmented_user, history)
-        # Fallback to local HF model if Cohere not set or failed
         if not out:
             model, tokenizer = load_local_model()
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
-        # Tidy echoes and sanitize
         if isinstance(out, str):
             for tag in ("Assistant:", "System:", "User:"):
                 if out.startswith(tag):
@@ -383,16 +342,14 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         if blocked_out:
             safe_out = refusal_reply(reason_out)
-        # Decide next state:
-        # If we just asked clarifications, set awaiting_answers=True.
-        # If we just produced structured analysis, set awaiting_answers=False.
         new_awaiting = awaiting_answers
-        if not awaiting_answers and "clarification questions" in safe_out.lower():
             new_awaiting = True
-        elif awaiting_answers and "structured analysis" in safe_out.lower():
             new_awaiting = False
-        # Audit (content-free fingerprints)
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
@@ -400,6 +357,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         })
         return history + [(user_msg, safe_out)], new_awaiting
     except Exception as e:
         err = f"Error: {e}"
         try:
@@ -438,7 +396,7 @@ textarea, input, .gr-input { border-radius: 12px !important; }
 #chat-container { position: relative; }
 """
-# ---------- UI (single window; uploads at bottom) ----------
 with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     gr.Markdown("# ClarityOps Augmented Decision AI")
@@ -466,7 +424,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
-    state_awaiting = gr.State(value=False)  # False = Phase 1 next; True = awaiting answers for Phase 2
     def _store_uploads(files, current):
         paths = []
@@ -477,19 +435,18 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
     def _on_send(user_msg, history, up_paths, awaiting):
-        # Hide handshake on first interaction by returning a class change
         hide_overlay_js = gr.update(value='<div id="handshake-overlay" class="hidden"></div>')
         try:
             if not user_msg or not user_msg.strip():
                 return history, "", history, awaiting, hide_overlay_js
-            new_history, new_awaiting = clarityops_reply(user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting)
             return new_history, "", new_history, new_awaiting, hide_overlay_js
         except Exception as e:
             err = f"Error: {e}"
-            try:
-                traceback.print_exc()
-            except Exception:
-                pass
             new_hist = (history or []) + [(user_msg or "", err)]
             return new_hist, "", new_hist, awaiting, hide_overlay_js
@@ -502,7 +459,6 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
                concurrency_limit=2, queue=True)
     def _on_clear():
-        # Reset everything, show handshake again
         return [], "", [], False, '<div id="handshake-overlay">ClarityOps loaded. Paste your scenario and attach files. I’ll ask up to 5 clarifications, then produce the structured analysis</div>'
     clear.click(_on_clear, None, [chat, msg, state_history, state_awaiting, handshake])

+import os, re, json, traceback, pathlib
 from functools import lru_cache
 import gradio as gr
 import torch
 import regex as re2  # pip install regex
 from settings import SNAPSHOT_PATH, PERSIST_CONTENT
 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
+HOME = pathlib.Path.home()  # /home/user on Spaces
+HF_HOME = str(HOME / ".cache" / "huggingface")
+HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
+HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
+ST_HOME = str(HOME / ".cache" / "sentence-transformers")
+GRADIO_TMP = str(HOME / "app" / "gradio")  # you can switch to "/tmp/gradio" if preferred
+GRADIO_CACHE = GRADIO_TMP
+os.environ.setdefault("HF_HOME", HF_HOME)
+os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
+os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)  # deprecated warning is harmless
+os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
+os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
+os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
+# Disable experimental xet; prefer stable transfer
 os.environ.setdefault("HF_HUB_ENABLE_XET", "0")
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+for p in [HF_HOME, HF_HUB_CACHE, HF_TRANSFORMERS, ST_HOME, GRADIO_TMP, GRADIO_CACHE]:
     try:
         os.makedirs(p, exist_ok=True)
     except Exception:
 def is_identity_query(message, history):
     patterns = [
+        r"\bwho\s+are\s+you\b", r"\bwhat\s+are\s+you\b", r"\bwhat\s+is\s+your\s+name\b",
+        r"\bwho\s+is\s+this\b", r"\bidentify\s+yourself\b", r"\btell\s+me\s+about\s+yourself\b",
+        r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
     def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns)
             a = item[1] if len(item) > 1 else ""
             yield u, a
+def _sanitize_text(s: str) -> str:
+    if not isinstance(s, str):
+        return s
+    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 def _history_to_prompt(message, history):
+    parts = [f"System: {SYSTEM_MASTER}"]
     for u, a in _iter_user_assistant(history):
         if u: parts.append(f"User: {u}")
         if a: parts.append(f"Assistant: {a}")
     parts.append("Assistant:")
     return "\n".join(parts)
+# ---------- Cohere first ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         return None
     try:
         client = cohere.Client(api_key=COHERE_API_KEY)
         prompt = _history_to_prompt(message, history)
         resp = client.chat(
     except Exception:
         return None
+# ---------- Local model (HF) ----------
 @lru_cache(maxsize=1)
 def load_local_model():
     if not HF_TOKEN:
     tok = AutoTokenizer.from_pretrained(
         MODEL_ID, token=HF_TOKEN, use_fast=True, model_max_length=8192,
         padding_side="left", trust_remote_code=True,
+        cache_dir=os.environ.get("TRANSFORMERS_CACHE")
     )
     try:
         mdl = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, token=HF_TOKEN, device_map=device_map,
             low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
+            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
         )
     except Exception:
         mdl = AutoModelForCausalLM.from_pretrained(
             MODEL_ID, token=HF_TOKEN,
             low_cpu_mem_usage=True, torch_dtype=dtype, trust_remote_code=True,
+            cache_dir=os.environ.get("TRANSFORMERS_CACHE")
         )
         mdl.to("cuda" if torch.cuda.is_available() else "cpu")
     if mdl.config.eos_token_id is None and tok.eos_token_id is not None:
     return mdl, tok
 def build_inputs(tokenizer, message, history):
+    msgs = [{"role": "system", "content": SYSTEM_MASTER}]
     for u, a in _iter_user_assistant(history):
         if u: msgs.append({"role": "user", "content": u})
         if a: msgs.append({"role": "assistant", "content": a})
     gen_only = out[0, input_ids.shape[-1]:]
     return tokenizer.decode(gen_only, skip_special_tokens=True).strip()
+# ---------- Snapshot, retriever, RAG ----------
 def _load_snapshot(path=SNAPSHOT_PATH):
     try:
         with open(path, "r", encoding="utf-8") as f:
             "isolation_needs_waiting": {"contact": 3, "airborne": 1}, "telemetry_needed_waiting": 5
         }
 init_retriever()
+_session_rag = SessionRAG()
 def _mdsi_block():
     base_capacity = capacity_projection(18, 48, 6)
     cons_capacity = capacity_projection(12, 48, 6)
         "outcomes_summary": outcomes
     }, indent=2)
+# ---------- Core chat logic (two-phase) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
     awaiting_answers:
+      - False: Phase 1 -> generate clarification questions and WAIT
+      - True:  Phase 2 -> consume clarifications and produce structured analysis
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
+        # Ingest uploads (text + artifacts like CSV headers)
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
+        # Columns helper
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
+        # Session retrieval to enrich the system preamble
         session_snips = "\n---\n".join(_session_rag.retrieve(
+            "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
+            "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         )
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
             session_snips=session_snips
         )
+        # Phase directive
         if not awaiting_answers:
             phase_directive = (
                 "\n\n[INSTRUCTION TO MODEL]\n"
         augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + safe_in + phase_directive
+        # Call LLM
         out = cohere_chat(augmented_user, history)
         if not out:
             model, tokenizer = load_local_model()
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
+        # Clean + sanitize
         if isinstance(out, str):
             for tag in ("Assistant:", "System:", "User:"):
                 if out.startswith(tag):
         if blocked_out:
             safe_out = refusal_reply(reason_out)
+        # Flip phase state based on headers
         new_awaiting = awaiting_answers
+        low = safe_out.lower()
+        if not awaiting_answers and "clarification questions" in low:
             new_awaiting = True
+        elif awaiting_answers and "structured analysis" in low:
             new_awaiting = False
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
         })
         return history + [(user_msg, safe_out)], new_awaiting
     except Exception as e:
         err = f"Error: {e}"
         try:
 #chat-container { position: relative; }
 """
+# ---------- UI ----------
 with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     gr.Markdown("# ClarityOps Augmented Decision AI")
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
+    state_awaiting = gr.State(value=False)  # False -> Phase 1 next; True -> awaiting answers for Phase 2
     def _store_uploads(files, current):
         paths = []
     uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
     def _on_send(user_msg, history, up_paths, awaiting):
         hide_overlay_js = gr.update(value='<div id="handshake-overlay" class="hidden"></div>')
         try:
             if not user_msg or not user_msg.strip():
                 return history, "", history, awaiting, hide_overlay_js
+            new_history, new_awaiting = clarityops_reply(
+                user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting
+            )
             return new_history, "", new_history, new_awaiting, hide_overlay_js
         except Exception as e:
             err = f"Error: {e}"
+            try: traceback.print_exc()
+            except Exception: pass
             new_hist = (history or []) + [(user_msg or "", err)]
             return new_hist, "", new_hist, awaiting, hide_overlay_js
                concurrency_limit=2, queue=True)
     def _on_clear():
         return [], "", [], False, '<div id="handshake-overlay">ClarityOps loaded. Paste your scenario and attach files. I’ll ask up to 5 clarifications, then produce the structured analysis</div>'
     clear.click(_on_clear, None, [chat, msg, state_history, state_awaiting, handshake])