Spaces:

resberry
/

MrCardio

Sleeping

App Files Files Community

resberry commited on Apr 9

Commit

41aa811

verified ·

1 Parent(s): eb76838

Update app.py

Browse files

Files changed (1) hide show

app.py +1136 -578

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import os
 import re
 import time
-import json
-import queue
-import logging
-import threading
 import traceback
-from typing import List, Dict, TypedDict, Optional, Tuple
 from dataclasses import dataclass, field
 import torch
@@ -20,44 +17,47 @@ from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
 # ============================================================
-# AGENTIC ECG CHATBOT
-# - Starts as normal chatbot
-# - Detects ECG / cardiology intent automatically
-# - Retrieves from CSV RAG store only for ECG questions
-# - Runs local ECG adapter reasoning
-# - Runs remote evidence summarizer
-# - Runs remote clinical-composer agent
-# - Merges both into a final long answer
-# - Simple UI with Send / Clear
-# - Visible thinking status + progress logs
 # ============================================================
-raw_omp = str(os.getenv("OMP_NUM_THREADS", "1")).strip()
-os.environ["OMP_NUM_THREADS"] = raw_omp if re.fullmatch(r"\d+", raw_omp) else "1"
-# ============================================================
 # LOGGING
-# ============================================================
 logging.basicConfig(
     level=logging.INFO,
-    format="%(asctime)s | %(levelname)s | %(message)s"
 )
-logger = logging.getLogger("agentic_ecg_chatbot")
-# ============================================================
 # CONFIG
-# ============================================================
 @dataclass
 class Config:
-    base_model_path: str = os.getenv("BASE_MODEL_PATH", "meta-llama/Llama-3.1-8B-Instruct")
-    adapter_dir: str = os.getenv("ADAPTER_DIR", "adapter_refined_v10")
-    data_csv: str = os.getenv("DATA_CSV", "RAGmaterials/ECG_RAG_only_clean.csv")
-    rag_dir: str = os.getenv("RAG_DIR", "RAGmaterials")
     vectorstore_dir: str = field(init=False)
     hf_token: str = os.getenv("HF_TOKEN", "")
@@ -65,41 +65,51 @@ class Config:
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
-    embed_model_name: str = os.getenv("EMBED_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
-    similarity_k: int = int(os.getenv("SIMILARITY_K", "10"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
-    max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5500"))
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
-    max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "220"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
-    min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.06"))
-    min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.18"))
-    deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.15"))
-    deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "900"))
-    use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
     enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
-    enable_warmup: bool = os.getenv("ENABLE_WARMUP", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
     def __post_init__(self):
         self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
         os.makedirs(self.rag_dir, exist_ok=True)
         if not self.deepseek_api_key:
-            raise ValueError("Missing DEEPSEEK_API_KEY in environment / Space secrets.")
-        if not self.hf_token:
-            raise ValueError("Missing HF_TOKEN in environment / Space secrets.")
         for path, name in [
             (self.adapter_dir, "Adapter directory"),
@@ -113,38 +123,20 @@ cfg = Config()
 logger.info("Configuration loaded.")
-# ============================================================
 # PROMPTS
-# ============================================================
-INTENT_CLASSIFIER_SYSTEM = """
-You classify user messages.
-Return only one label:
-- ECG_RAG
-- NORMAL_CHAT
-Choose ECG_RAG if the message is about ECG, EKG, cardiology, arrhythmia, heart rhythm, cardiac conduction,
-ST changes, QRS, PR, QT, tachycardia, bradycardia, atrial fibrillation, flutter, bundle branch block,
-heart block, hyperkalemia ECG changes, or similar cardiology interpretation.
-Otherwise return NORMAL_CHAT.
-""".strip()
-QUERY_EXPANSION_SYSTEM = """
-You expand ECG and cardiology retrieval queries.
 Rules:
-1. Preserve the exact user intent.
-2. Add close cardiology / ECG synonyms and alternate wording.
-3. Do not answer the question.
-4. Output only the expanded retrieval query.
-""".strip()
-LOCAL_REASONING_SYSTEM = """
-You are a strict ECG and cardiology reasoning assistant.
-You are not the final answer generator.
-Use only the evidence provided.
-Do not invent facts.
-Output exactly in this format:
 KEY_FINDINGS:
 - ...
@@ -161,102 +153,138 @@ SUPPORTED_POINTS:
 LIMITS:
 - ...
-If evidence is insufficient, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
-RAG_SUMMARY_SYSTEM = """
-You are a clinical evidence summarizer.
-Write a well-structured answer grounded only in the provided evidence and reasoning draft.
-Do not use outside knowledge.
-Be accurate, conservative, and clinically clear.
-Output format:
-### Summary
-4 to 7 full sentences.
-### Key Evidence Points
-4 to 6 bullet points.
-### Clinical Interpretation
-2 to 4 bullet points if supported.
-### Evidence Limits
-State what is not established.
-If the evidence is too weak, output exactly:
-INSUFFICIENT_EVIDENCE
 """.strip()
-CLINICAL_COMPOSER_SYSTEM = """
-You are a second medical composition agent.
-Your job is to produce a longer, polished explanation from the same evidence and the same user question.
-You must stay faithful to the evidence.
-Do not add unsupported facts.
-Do not mention tools, prompts, or pipelines.
-Output format:
-### Direct Answer
-A direct answer in 2 to 3 sentences.
-### Expanded Explanation
-A longer explanation in 5 to 8 sentences.
-### Important Notes
-3 to 5 bullet points.
-### Remaining Uncertainty
-State what the evidence does not prove.
-If the evidence is too weak, output exactly:
 INSUFFICIENT_EVIDENCE
-""".strip()
-FINAL_MERGER_SYSTEM = """
-You are the final answer agent.
-You will receive:
-1. the user's question
-2. retrieved evidence
-3. a local ECG adapter reasoning draft
-4. summary agent output
-5. clinical composer output
-Write one final long-form answer.
-Rules:
-- Use only supported information.
-- Merge overlapping ideas cleanly.
-- Do not repeat the same point too many times.
-- Make the answer helpful, detailed, and readable.
-- Do not mention internal agents or processing steps.
-Output format:
-### Final Answer
-A detailed answer in 6 to 10 sentences.
-### Key Points
-4 to 6 bullets.
-### Clinical Perspective
-2 to 4 bullets if supported.
-### Limits
-A short honest limitations section.
-If evidence is weak, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
-NORMAL_CHAT_SYSTEM = """
-You are a helpful, friendly chatbot.
-Be conversational, clear, and useful.
-Answer normally.
-Do not mention hidden tools or internal systems.
 """.strip()
-# ============================================================
 # HELPERS
-# ============================================================
 def clean_text(x: str) -> str:
     x = str(x).replace("\x00", " ").strip()
     x = re.sub(r"\s+", " ", x)
@@ -265,6 +293,20 @@ def clean_text(x: str) -> str:
 def strip_bad_sections(txt: str) -> str:
     t = str(txt).strip()
     t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
     return t
@@ -272,16 +314,22 @@ def strip_bad_sections(txt: str) -> str:
 def infer_tags(question: str, answer: str) -> List[str]:
     text = f"{question} {answer}".lower()
     tags: List[str] = []
     keyword_map = {
-        "ecg": ["ecg", "ekg", "qrs", "pr", "qt", "st elevation", "t wave", "arrhythmia", "tachycardia", "bradycardia"],
         "diagnosis": ["diagnosis", "diagnose", "criteria"],
-        "treatment": ["treat", "therapy", "management", "drug"],
-        "symptoms": ["symptom", "sign", "presentation"],
-        "etiology": ["cause", "caused by", "associated with", "risk factor"],
     }
     for tag, words in keyword_map.items():
         if any(w in text for w in words):
             tags.append(tag)
     return tags
@@ -305,95 +353,110 @@ def lexical_overlap(query: str, text: str) -> float:
     return len(q_words & t_words) / max(1, len(q_words))
 def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
-    max_turns = max_turns or cfg.max_chat_history_turns
     items = chat_history[-max_turns:]
     if not items:
         return "[EMPTY]"
     return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
 def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
-    max_chars = max_chars or cfg.max_context_chars
     blocks = []
     total = 0
     for i, d in enumerate(docs, 1):
         q = d.metadata.get("question", "")
         a = d.metadata.get("answer", "")
         tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
-        sim = d.metadata.get("sim_score", "N/A")
         block = f"""
 ==============================
 EVIDENCE_ID: {i}
 SOURCE_ID: {d.metadata.get('id')}
 SOURCE_QUESTION: {q}
 SOURCE_TAGS: {tags}
-SIMILARITY: {sim}
 EVIDENCE_TEXT:
 {a}
 ==============================
 """.strip()
         if total + len(block) > max_chars:
             break
         blocks.append(block)
         total += len(block) + 2
-    return "\n\n".join(blocks).strip()
-def stream_text(text: str, step: int = 120):
-    acc = ""
-    for i in range(0, len(text), step):
-        acc += text[i:i + step]
-        yield acc
-# ============================================================
-# PROGRESS / LOGGING
-# ============================================================
-def new_progress_state() -> Dict:
-    return {"lines": []}
-def add_progress(progress_state: Dict, msg: str):
-    line = f"[{time.strftime('%H:%M:%S')}] {msg}"
-    logger.info(msg)
-    progress_state["lines"].append(line)
-    progress_state["lines"] = progress_state["lines"][-80:]
-def progress_text(progress_state: Dict) -> str:
-    lines = progress_state.get("lines", [])
-    return "\n".join(lines) if lines else "No progress yet."
-# ============================================================
-# ECG QUERY DETECTION
-# ============================================================
-ECG_REGEXES = [
-    r"\becg\b", r"\bekg\b", r"\bcardiology\b", r"\barrhythmia\b", r"\bheart rhythm\b",
-    r"\batrial fibrillation\b", r"\bafib\b", r"\bflutter\b", r"\bqrs\b", r"\bpr interval\b",
-    r"\bqt\b", r"\bst elevation\b", r"\bst depression\b", r"\bt wave\b", r"\bbradycardia\b",
-    r"\btachycardia\b", r"\bheart block\b", r"\bbundle branch block\b", r"\bhyperkalemia\b",
-]
-def detect_ecg_by_rules(text: str) -> bool:
-    text = str(text or "").lower().strip()
-    return any(re.search(p, text) for p in ECG_REGEXES)
-# ============================================================
 # EMBEDDINGS + VECTORSTORE
-# ============================================================
 logger.info("Loading embeddings...")
-embeddings = HuggingFaceEmbeddings(
-    model_name=cfg.embed_model_name,
-    model_kwargs={
-        "device": "cuda" if torch.cuda.is_available() else "cpu",
-        "token": cfg.hf_token if cfg.hf_token else None,
-    },
-    encode_kwargs={"normalize_embeddings": True},
-)
 def build_vectorstore():
@@ -421,7 +484,7 @@ def build_vectorstore():
                     "question": q,
                     "answer": a,
                     "tags": infer_tags(q, a),
-                },
             )
         )
@@ -446,15 +509,16 @@ vectorstore = load_vectorstore()
 logger.info("Vectorstore ready.")
-# ============================================================
-# MODEL LOADING
-# ============================================================
 logger.info("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
     cfg.base_model_path,
     use_fast=True,
-    token=cfg.hf_token if cfg.hf_token else None,
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
@@ -495,19 +559,10 @@ if base_model is None:
 base_model.eval()
-logger.info("Loading ECG adapter...")
 reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
 reason_model.eval()
-logger.info("Loading remote LLM client...")
-remote_llm = ChatOpenAI(
-    model=cfg.deepseek_model,
-    api_key=cfg.deepseek_api_key,
-    base_url=cfg.deepseek_base_url,
-    temperature=cfg.deepseek_temperature,
-    max_tokens=cfg.deepseek_max_tokens,
-)
 def get_primary_model_device(model) -> torch.device:
     try:
@@ -516,50 +571,15 @@ def get_primary_model_device(model) -> torch.device:
         return torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ============================================================
-# LLM CALLS
-# ============================================================
-def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
-    try:
-        resp = remote_llm.invoke([
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ])
-        text = resp.content if hasattr(resp, "content") else str(resp)
-        text = strip_bad_sections(text)
-        return text if text.strip() else fallback
-    except Exception as e:
-        logger.error(f"Remote LLM error: {e}")
-        traceback.print_exc()
-        return fallback
-def classify_intent(user_query: str) -> str:
-    if detect_ecg_by_rules(user_query):
-        return "ECG_RAG"
-    result = llm_text(
-        INTENT_CLASSIFIER_SYSTEM,
-        f"USER_MESSAGE:\n{user_query}",
-        fallback="NORMAL_CHAT",
-    ).strip().upper()
-    return "ECG_RAG" if "ECG_RAG" in result else "NORMAL_CHAT"
-def run_query_expansion(user_query: str) -> str:
-    if not cfg.enable_query_expansion:
-        return user_query
-    prompt = f"USER_QUERY:\n{user_query}\n\nExpand this for ECG/cardiology retrieval."
-    expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
-    return expanded.strip() if expanded else user_query
 @torch.inference_mode()
 def run_local_reasoner(user_query: str, context: str) -> str:
     try:
         messages = [
             {"role": "system", "content": LOCAL_REASONING_SYSTEM},
-            {"role": "user", "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context or '[EMPTY]'}"},
         ]
         prompt = tokenizer.apply_chat_template(
@@ -591,81 +611,117 @@ def run_local_reasoner(user_query: str, context: str) -> str:
         gen_ids = out[0, inputs["input_ids"].shape[1]:]
         text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
-        return strip_bad_sections(text) or "INSUFFICIENT_EVIDENCE"
     except Exception as e:
         logger.error(f"Local reasoner error: {e}")
         traceback.print_exc()
         return "INSUFFICIENT_EVIDENCE"
-def run_rag_summary(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
-    prompt = f"""
-CHAT_HISTORY:
-{history_to_text(chat_history)}
-USER_QUESTION:
-{user_query}
-RETRIEVED_EVIDENCE:
-{context if context.strip() else '[EMPTY]'}
-LOCAL_REASONING_DRAFT:
-{reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
-""".strip()
-    return llm_text(RAG_SUMMARY_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
-def run_clinical_composer(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
-    prompt = f"""
-CHAT_HISTORY:
-{history_to_text(chat_history)}
-USER_QUESTION:
-{user_query}
-RETRIEVED_EVIDENCE:
-{context if context.strip() else '[EMPTY]'}
-LOCAL_REASONING_DRAFT:
-{reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
 """.strip()
-    return llm_text(CLINICAL_COMPOSER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
-def run_final_merger(user_query: str, context: str, reasoning_draft: str, summary_a: str, summary_b: str) -> str:
     prompt = f"""
 USER_QUESTION:
 {user_query}
 RETRIEVED_EVIDENCE:
 {context if context.strip() else '[EMPTY]'}
-LOCAL_ECG_REASONING:
 {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
-SUMMARY_AGENT_OUTPUT:
-{summary_a if summary_a.strip() else '[EMPTY]'}
-CLINICAL_COMPOSER_OUTPUT:
-{summary_b if summary_b.strip() else '[EMPTY]'}
 """.strip()
-    return llm_text(FINAL_MERGER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
-def run_normal_chat(user_query: str, chat_history: List[Dict[str, str]]) -> str:
     prompt = f"""
-CHAT_HISTORY:
-{history_to_text(chat_history)}
-USER_MESSAGE:
-{user_query}
 """.strip()
-    return llm_text(NORMAL_CHAT_SYSTEM, prompt, fallback="Sorry, I could not generate a response.")
-# ============================================================
 # WARMUP
-# ============================================================
 def warmup_models():
     logger.info("Warming up local reasoner...")
     try:
@@ -676,7 +732,6 @@ def warmup_models():
 EVIDENCE_ID: 1
 SOURCE_QUESTION: What are ECG findings in hyperkalemia?
 SOURCE_TAGS: ecg
-SIMILARITY: 0.9
 EVIDENCE_TEXT:
 Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
 ==============================
@@ -687,58 +742,38 @@ Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe
         logger.warning(f"Warmup failed: {e}")
-if cfg.enable_warmup:
-    warmup_models()
-# ============================================================
 # STATE
-# ============================================================
-class AgentState(TypedDict, total=False):
     user_query: str
-    chat_history: List[Dict[str, str]]
-    detected_mode: str
     expanded_query: str
     retrieved_docs: List[Document]
     best_score: float
     context: str
-    local_reasoning: str
-    summary_agent: str
-    composer_agent: str
     final_answer: str
-# ============================================================
 # RETRIEVAL
-# ============================================================
-def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
-    top_n = top_n or cfg.top_k_final
-    q_words = set(re.findall(r"\w+", query.lower()))
-    scored = []
-    for d in docs:
-        question = d.metadata.get("question", "")
-        answer = d.metadata.get("answer", "")
-        tags = " ".join(d.metadata.get("tags", []))
-        text = f"{question} {answer} {tags}".lower()
-        t_words = set(re.findall(r"\w+", text))
-        overlap = len(q_words & t_words) / max(1, len(q_words))
-        question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
-        tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
-        sim_score = float(d.metadata.get("sim_score", 0.0))
-        final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
-        scored.append((d, final_score))
-    scored.sort(key=lambda x: x[1], reverse=True)
-    return [d for d, _ in scored[:top_n]]
-def retrieve_docs_once(query_for_search: str, original_query: str) -> Tuple[List[Document], float]:
     try:
-        scored = vectorstore.similarity_search_with_score(query_for_search, k=cfg.similarity_k)
     except Exception as e:
         logger.error(f"Retriever error: {e}")
         traceback.print_exc()
@@ -748,216 +783,493 @@ def retrieve_docs_once(query_for_search: str, original_query: str) -> Tuple[List
         return [], -1.0
     filtered_docs = []
     for doc, raw_score in scored:
         sim = score_to_similarity(raw_score)
         q = doc.metadata.get("question", "")
         a = doc.metadata.get("answer", "")
         ov = lexical_overlap(original_query, f"{q} {a}")
-        if sim >= 0.45 or (ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity):
             new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
             new_doc.metadata["sim_score"] = sim
             new_doc.metadata["lexical_overlap"] = ov
             filtered_docs.append(new_doc)
     reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
-    best_score = max((float(d.metadata.get("sim_score", -1.0)) for d in reranked), default=-1.0)
     return reranked, best_score
-def retrieve_docs(query: str) -> Tuple[List[Document], float, str]:
-    docs_a, score_a = retrieve_docs_once(query, query)
     if not cfg.enable_query_expansion:
-        return docs_a, score_a, query
-    expanded = run_query_expansion(query)
-    docs_b, score_b = retrieve_docs_once(expanded, query)
-    merged = []
-    seen_ids = set()
-    for d in docs_a + docs_b:
-        doc_id = d.metadata.get("id")
-        if doc_id not in seen_ids:
-            seen_ids.add(doc_id)
-            merged.append(d)
-    merged = rerank_docs(query, merged, top_n=cfg.top_k_final)
-    best_score = max(score_a, score_b)
-    return merged, best_score, expanded
-# ============================================================
-# CORE AGENTIC PIPELINE
-# ============================================================
-def initialize_session() -> Dict:
-    return {
-        "chat_history": [],
-        "last_result": None,
-        "progress": new_progress_state(),
-    }
-def run_agentic_turn(user_query: str, session_state: Dict) -> Dict:
-    if session_state is None:
-        session_state = initialize_session()
-    progress = new_progress_state()
-    add_progress(progress, "User message received")
-    chat_history = session_state.get("chat_history", [])
-    add_progress(progress, "Detecting query type")
-    mode = classify_intent(user_query)
-    add_progress(progress, f"Detected mode: {mode}")
-    if mode == "NORMAL_CHAT":
-        add_progress(progress, "Running normal chat response")
-        answer = run_normal_chat(user_query, chat_history)
-        result = {
-            "mode": "normal_chat",
-            "final_answer": answer,
-            "retrieved_docs": [],
-            "best_score": -1.0,
-            "context": "",
-            "local_reasoning": "",
-            "summary_agent": "",
-            "composer_agent": "",
-            "progress_text": progress_text(progress),
         }
-    else:
-        add_progress(progress, "Running ECG retrieval")
-        docs, best_score, expanded_query = retrieve_docs(user_query)
-        add_progress(progress, f"Retrieved {len(docs)} document(s)")
-        add_progress(progress, f"Best score: {best_score:.3f}")
-        add_progress(progress, f"Expanded query: {expanded_query}")
-        context = build_context_string(docs)
-        if not context.strip():
-            add_progress(progress, "No strong ECG evidence found")
-            answer = "I could not find sufficiently relevant ECG evidence in the CSV knowledge base for this question."
-            result = {
-                "mode": "ecg_rag",
-                "final_answer": answer,
-                "retrieved_docs": docs,
-                "best_score": best_score,
-                "context": context,
-                "local_reasoning": "",
-                "summary_agent": "",
-                "composer_agent": "",
-                "progress_text": progress_text(progress),
-            }
-        else:
-            add_progress(progress, "Running local ECG adapter reasoning")
-            local_reasoning = run_local_reasoner(user_query, context)
-            add_progress(progress, "Running summary agent")
-            summary_agent = run_rag_summary(user_query, context, local_reasoning, chat_history)
-            add_progress(progress, "Running clinical composer agent")
-            composer_agent = run_clinical_composer(user_query, context, local_reasoning, chat_history)
-            add_progress(progress, "Running final merger agent")
-            final_answer = run_final_merger(user_query, context, local_reasoning, summary_agent, composer_agent)
-            if not final_answer.strip() or final_answer.strip() == "INSUFFICIENT_EVIDENCE":
-                final_answer = summary_agent if summary_agent.strip() else "INSUFFICIENT_EVIDENCE"
-            add_progress(progress, "Final answer ready")
-            result = {
-                "mode": "ecg_rag",
-                "final_answer": final_answer,
-                "retrieved_docs": docs,
-                "best_score": best_score,
-                "context": context,
-                "local_reasoning": local_reasoning,
-                "summary_agent": summary_agent,
-                "composer_agent": composer_agent,
-                "progress_text": progress_text(progress),
-            }
-    session_state["chat_history"].append({"role": "user", "content": user_query})
-    session_state["chat_history"].append({"role": "assistant", "content": result["final_answer"]})
-    session_state["chat_history"] = session_state["chat_history"][-12:]
-    session_state["last_result"] = result
-    session_state["progress"] = progress
-    return {"result": result, "session_state": session_state}
-# ============================================================
 # UI HELPERS
-# ============================================================
 CUSTOM_CSS = """
 html, body, .gradio-container {
     margin: 0 !important;
     padding: 0 !important;
-    background: #0b1220;
-    color: #e5e7eb;
 }
 .gradio-container {
-    max-width: 900px !important;
-    margin: 0 auto !important;
-    padding: 16px !important;
 }
-.simple-card {
-    border: 1px solid rgba(255,255,255,0.08);
-    background: #111827;
-    border-radius: 18px;
     padding: 16px;
     margin-bottom: 12px;
 }
-.app-title {
-    font-size: 1.4rem;
     font-weight: 800;
-    color: #f9fafb;
     margin-bottom: 6px;
 }
-.app-subtitle {
-    font-size: 0.95rem;
     color: #cbd5e1;
 }
 #chatbot {
-    min-height: 60vh !important;
     border-radius: 18px !important;
 }
-.status-box {
-    border: 1px solid rgba(255,255,255,0.08);
-    background: linear-gradient(180deg, #111827 0%, #172033 100%);
-    border-radius: 16px;
     padding: 12px 14px;
-    color: #f3f4f6;
 }
-.thinking-dots {
-    display: inline-block;
-    letter-spacing: 4px;
     font-weight: 800;
     animation: blinkDots 1s steps(1, end) infinite;
 }
 @keyframes blinkDots {
     0% { opacity: 1; }
-    50% { opacity: 0.2; }
     100% { opacity: 1; }
 }
 textarea, .gr-textbox textarea {
-    border-radius: 14px !important;
 }
 button {
     border-radius: 14px !important;
     min-height: 44px !important;
     font-weight: 600 !important;
 }
 """
-def header_html() -> str:
     return """
-    <div class="simple-card">
-        <div class="app-title">🫀 Agentic ECG Chatbot</div>
-        <div class="app-subtitle">
-            Starts as normal chat. If the question is ECG/cardiology-related, it automatically switches into ECG evidence mode,
-            retrieves from your CSV knowledge base, runs local ECG adapter reasoning, builds two summaries, and merges them into one long final answer.
         </div>
     </div>
     """
@@ -965,160 +1277,398 @@ def header_html() -> str:
 def thinking_html(stage: str) -> str:
     return f"""
-    <div class="status-box">
-        <b>{stage}</b><br>
-        Model is thinking <span class="thinking-dots">...</span>
     </div>
     """
-def add_assistant_placeholder(history, text="Thinking..."):
     history = history or []
-    history.append({"role": "assistant", "content": text, "metadata": {"title": "Thinking"}})
     return history
-def update_last_assistant_message(history, text, title="Answer"):
     history = history or []
     if not history or history[-1]["role"] != "assistant":
-        history.append({"role": "assistant", "content": text, "metadata": {"title": title}})
         return history
-    history[-1] = {"role": "assistant", "content": text, "metadata": {"title": title}}
     return history
-def user_submit(user_message, chat_history):
-    chat_history = chat_history or []
     user_message = (user_message or "").strip()
     if not user_message:
-        return "", chat_history
-    chat_history.append({"role": "user", "content": user_message})
-    return "", chat_history
-def format_sources(result: Optional[Dict]) -> str:
-    if not result:
-        return "No sources yet."
-    docs = result.get("retrieved_docs", [])
-    if not docs:
-        return "No ECG retrieval used for the last answer."
-    lines = [f"Best score: {result.get('best_score', -1.0):.3f}", ""]
-    for i, d in enumerate(docs, 1):
-        q = d.metadata.get("question", "")
-        a = d.metadata.get("answer", "")
-        sim = d.metadata.get("sim_score", "N/A")
-        preview = a[:220] + ("..." if len(a) > 220 else "")
-        lines += [
-            f"Evidence {i}",
-            f"- Question: {q}",
-            f"- Similarity: {sim}",
-            f"- Preview: {preview}",
-            "",
-        ]
-    return "\n".join(lines).strip()
-def clear_chat():
-    st = initialize_session()
-    return [], st, "", "No progress yet.", "No sources yet."
-def rebuild_store(session_state, chat_history):
-    global vectorstore
-    if not cfg.allow_rebuild_vectorstore:
-        chat_history = chat_history or []
-        chat_history.append({"role": "assistant", "content": "Vector store rebuild is disabled.", "metadata": {"title": "Restricted"}})
-        return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
-    build_vectorstore()
-    vectorstore = load_vectorstore()
-    chat_history = chat_history or []
-    chat_history.append({"role": "assistant", "content": "✅ Vector store rebuilt.", "metadata": {"title": "Done"}})
-    return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
-# ============================================================
-# STREAMING RESPONSE
-# ============================================================
-def bot_respond_stream(chat_history, session_state):
     if session_state is None:
         session_state = initialize_session()
-    if not chat_history:
-        yield chat_history, session_state, "", "No progress yet.", "No sources yet."
         return
-    user_message = str(chat_history[-1]["content"]).strip()
-    chat_history = add_assistant_placeholder(chat_history, "Thinking...")
-    yield chat_history, session_state, thinking_html("Understanding your message"), "Starting...", ""
-    time.sleep(0.4)
-    yield chat_history, session_state, thinking_html("Detecting whether this is normal chat or ECG reasoning"), "Detecting intent...", ""
-    time.sleep(0.4)
-    detected = classify_intent(user_message)
-    if detected == "NORMAL_CHAT":
-        yield chat_history, session_state, thinking_html("Normal chatbot mode active"), "Running normal chat...", ""
-        time.sleep(0.4)
-    else:
-        yield chat_history, session_state, thinking_html("ECG mode detected: retrieving evidence"), "Retrieving ECG evidence...", ""
-        time.sleep(0.45)
-        yield chat_history, session_state, thinking_html("Running local ECG adapter reasoning"), "Running local reasoning...", ""
-        time.sleep(0.45)
-        yield chat_history, session_state, thinking_html("Generating multiple summaries and composing final answer"), "Generating final answer...", ""
-        time.sleep(0.45)
-    out = run_agentic_turn(user_message, session_state)
-    result = out["result"]
-    updated_session = out["session_state"]
-    answer = result.get("final_answer", "I could not generate an answer.")
-    sources = format_sources(result)
-    prog = result.get("progress_text", "No progress yet.")
     if cfg.enable_typewriter_stream:
-        for partial in stream_text(answer, step=140):
-            chat_history = update_last_assistant_message(chat_history, partial, title="Answer")
-            yield chat_history, updated_session, "", prog, sources
-    chat_history = update_last_assistant_message(chat_history, answer, title="Answer")
-    yield chat_history, updated_session, "", prog, sources
-# ============================================================
 # APP
-# ============================================================
-with gr.Blocks(title="Agentic ECG Chatbot", css=CUSTOM_CSS) as demo:
-    gr.HTML(header_html())
     session_state = gr.State(initialize_session())
-    chatbot = gr.Chatbot(
-        label="Chat",
-        elem_id="chatbot",
-        type="messages",
-        show_copy_button=True,
-        bubble_full_width=False,
-    )
-    user_box = gr.Textbox(
-        label="Message",
-        placeholder="Ask anything. ECG / cardiology questions are detected automatically.",
-        lines=2,
-        autofocus=True,
-    )
-    status_html = gr.HTML("")
-    with gr.Row():
-        send_btn = gr.Button("Submit", variant="primary")
-        clear_btn = gr.Button("Clear")
-    with gr.Accordion("Progress Log", open=False):
-        progress_panel = gr.Textbox(value="No progress yet.", lines=16, interactive=False)
-    with gr.Accordion("Retrieved ECG Sources", open=False):
-        sources_panel = gr.Textbox(value="No sources yet.", lines=16, interactive=False)
     submit_event = user_box.submit(
         fn=user_submit,
@@ -1126,33 +1676,41 @@ with gr.Blocks(title="Agentic ECG Chatbot", css=CUSTOM_CSS) as demo:
         outputs=[user_box, chatbot],
         queue=True,
     )
     submit_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
-        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=True,
     )
-    send_event = send_btn.click(
         fn=user_submit,
         inputs=[user_box, chatbot],
         outputs=[user_box, chatbot],
         queue=True,
     )
-    send_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
-        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=True,
     )
     clear_btn.click(
         fn=clear_chat,
         inputs=[],
-        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=False,
     )
 demo.queue(default_concurrency_limit=1)
@@ -1161,4 +1719,4 @@ if __name__ == "__main__":
         debug=cfg.launch_debug,
         server_name=cfg.server_name,
         server_port=cfg.server_port,
-    )

 import os
 import re
 import time
 import traceback
+import logging
+from typing import List, Dict, TypedDict, Optional
 from dataclasses import dataclass, field
 import torch
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
+from langgraph.graph import StateGraph, START, END
 # ============================================================
+# HUGGING FACE SPACES READY
+# Medical CSV RAG Chatbot
+# Mobile-friendly UI/UX version
+# Pipeline: RAG retrieval -> local ECG adapter reasoning -> grounded summary
 # ============================================================
+# -------------------------------
 # LOGGING
+# -------------------------------
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
 )
+logger = logging.getLogger(__name__)
+# -------------------------------
 # CONFIG
+# -------------------------------
 @dataclass
 class Config:
+    base_model_path: str = os.getenv(
+        "BASE_MODEL_PATH",
+        "meta-llama/Llama-3.1-8B-Instruct"
+    )
+    adapter_dir: str = os.getenv(
+        "ADAPTER_DIR",
+        "adapter_refined_v10"
+    )
+    data_csv: str = os.getenv(
+        "DATA_CSV",
+        "RAGmaterials/ECG_RAG_only_clean.csv"
+    )
+    rag_dir: str = os.getenv(
+        "RAG_DIR",
+        "RAGmaterials"
+    )
     vectorstore_dir: str = field(init=False)
     hf_token: str = os.getenv("HF_TOKEN", "")
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
+    deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.1"))
+    deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "550"))
+    embed_model_name: str = os.getenv(
+        "EMBED_MODEL_NAME",
+        "sentence-transformers/all-MiniLM-L6-v2"
+    )
+    similarity_k: int = int(os.getenv("SIMILARITY_K", "12"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
+    max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5200"))
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
+    max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "180"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
+    min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.08"))
+    min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.20"))
+    strong_retrieval_threshold: float = float(os.getenv("STRONG_RETRIEVAL_THRESHOLD", "0.30"))
+    strong_retrieval_min_docs: int = int(os.getenv("STRONG_RETRIEVAL_MIN_DOCS", "3"))
+    use_query_cache: bool = os.getenv("USE_QUERY_CACHE", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
+    enable_validator: bool = os.getenv("ENABLE_VALIDATOR", "true").lower() == "true"
     enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
+    show_debug_panel: bool = os.getenv("SHOW_DEBUG_PANEL", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
+    use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
+    blink_stage_1: float = float(os.getenv("BLINK_STAGE_1", "0.40"))
+    blink_stage_2: float = float(os.getenv("BLINK_STAGE_2", "0.55"))
+    blink_stage_3: float = float(os.getenv("BLINK_STAGE_3", "0.50"))
+    blink_before_answer: float = float(os.getenv("BLINK_BEFORE_ANSWER", "0.25"))
     def __post_init__(self):
         self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
         os.makedirs(self.rag_dir, exist_ok=True)
         if not self.deepseek_api_key:
+            raise ValueError("Missing DEEPSEEK_API_KEY. Add it in Hugging Face Space Secrets.")
         for path, name in [
             (self.adapter_dir, "Adapter directory"),
 logger.info("Configuration loaded.")
+# -------------------------------
 # PROMPTS
+# -------------------------------
+LOCAL_REASONING_SYSTEM = """
+You are a strict medical reasoning assistant specialized for ECG and cardiology reasoning.
+You are NOT the final answer generator.
+You must analyze ONLY the supplied evidence and produce a short structured reasoning draft.
 Rules:
+1) Use only the provided evidence.
+2) Do not invent facts.
+3) Focus only on the user's exact question.
+4) Output exactly in this structure:
 KEY_FINDINGS:
 - ...
 LIMITS:
 - ...
+5) If evidence is insufficient, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
+QUERY_EXPANSION_SYSTEM = """
+You expand medical queries for retrieval.
+Rules:
+1) Preserve the user's intent.
+2) Add close medical paraphrases and alternate wording.
+3) Add likely medical synonyms, abbreviations, and alternate phrasing.
+4) Do not answer the question.
+5) Output only the expanded retrieval query.
 """.strip()
+DEEPSEEK_SUMMARY_SYSTEM = """
+You are an expert medical evidence summarizer.
+Your job is to produce a clinically precise, well-structured answer grounded ONLY in:
+1. the retrieved evidence
+2. the local reasoning draft
+You must be faithful to the provided material and answer the user's question directly, clearly, and conservatively.
+PRIMARY OBJECTIVE
+- Identify the user's main intent before writing:
+  definition, cause, symptoms, diagnosis, investigation, treatment, prognosis, or genetics.
+- Prioritize that intent throughout the response.
+- The first sentence of the Summary must directly answer the user's question in the most clinically relevant way.
+GROUNDING RULES
+- Use only information supported by the retrieved evidence and local reasoning draft.
+- Do not add outside medical knowledge.
+- Do not infer specific facts unless they are clearly supported.
+- Do not invent treatments, diagnoses, risks, mechanisms, thresholds, statistics, timelines, monitoring plans, or prognosis details.
+- If the evidence is incomplete, be explicit about what is missing.
+- If the evidence is too weak to answer the question reliably, output exactly:
 INSUFFICIENT_EVIDENCE
+STYLE RULES
+- Write in precise, professional clinical language.
+- Be specific, not vague.
+- Be concise, but fully informative.
+- Avoid repetition, generic filler, and empty statements.
+- Do not mention retrieval, prompts, system instructions, reasoning drafts, tools, pipelines, or internal processes.
+- Do not include URLs or citations unless explicitly requested elsewhere.
+- Do not overstate certainty.
+- When appropriate, distinguish clearly between what is established, what is suggested, and what is not addressed by the evidence.
+OUTPUT FORMAT
+### Summary
+- Write 4 to 7 full sentences.
+- This is the most important section.
+- The first sentence must directly answer the user's question.
+- Focus primarily on the user's main intent.
+- Include only background information that improves understanding of the requested topic.
+- Make the summary clinically useful, specific, and evidence-faithful.
+### Key Evidence Points
+- Include 4 to 6 bullet points.
+- Each bullet must state a concrete fact supported by the evidence.
+- Prioritize clinically important facts over background detail.
+- Avoid repeating the same idea in different words.
+### Clinical Implications / Recommendations
+- Include 2 to 4 bullet points only if supported by the evidence.
+- Focus on practical interpretation, management implications, follow-up considerations, or next steps.
+- If the evidence supports recognition or framing rather than action, say that clearly.
+- Do not recommend interventions not supported by the evidence.
+### Limitations of the Evidence
+- State clearly what the evidence does not establish, does not cover, or leaves uncertain.
+- Explicitly note when details are lacking on:
+  treatment, diagnosis, prognosis, genetics, monitoring, recurrence prevention, comparative effectiveness, or long-term outcomes.
+- If the evidence is narrow, low-detail, or only partially aligned with the question, say so plainly.
+SPECIAL INSTRUCTIONS BY QUESTION TYPE
+For treatment questions:
+- Focus primarily on treatment and management, not disease definition.
+- Organize treatment information in this order whenever supported by the evidence:
+  1. supportive or conservative care
+  2. symptomatic drug therapy or procedural treatment
+  3. long-term prevention, follow-up, or recurrence prevention
+- Distinguish treatment of active symptoms from prevention of recurrence or complications.
+- If the condition is benign, self-limited, or often does not require treatment, state that clearly in the first sentence.
+For diagnosis or investigation questions:
+- Focus on how the condition is identified, evaluated, or differentiated.
+- Prioritize diagnostic features, testing approach, and clinically useful distinctions.
+- Do not drift into treatment unless the evidence clearly supports it and it helps answer the question.
+For cause or risk questions:
+- Focus on etiologies, risk factors, mechanisms, or associations supported by the evidence.
+- Distinguish established causes from possible contributors if the evidence is less certain.
+For prognosis questions:
+- Focus on expected course, complications, recurrence, or outcome-related information supported by the evidence.
+- Do not add prognostic claims not explicitly supported.
+QUALITY CHECK BEFORE OUTPUT
+Before finalizing, ensure that:
+- the first sentence directly answers the question
+- the response matches the user's primary intent
+- every important claim is grounded in the provided material
+- no unsupported medical detail has been added
+- the Limitations section honestly reflects evidence gaps
+If these conditions cannot be met, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
+VALIDATOR_SYSTEM = """
+You are a strict medical evidence validator.
+Your job is to compare the ANSWER against the EVIDENCE.
+Rules:
+1) Mark SUPPORTED if the answer is well grounded in the evidence.
+2) Mark PARTLY_UNSUPPORTED if some claims are supported but others go beyond the evidence.
+3) Mark INSUFFICIENT_EVIDENCE if the answer is mostly unsupported or the evidence is too weak.
+4) Output only one short verdict line beginning with exactly one of:
+SUPPORTED:
+PARTLY_UNSUPPORTED:
+INSUFFICIENT_EVIDENCE:
 """.strip()
+# -------------------------------
 # HELPERS
+# -------------------------------
 def clean_text(x: str) -> str:
     x = str(x).replace("\x00", " ").strip()
     x = re.sub(r"\s+", " ", x)
 def strip_bad_sections(txt: str) -> str:
     t = str(txt).strip()
+    cut_markers = [
+        "References:",
+        "Sources:",
+        "Source:",
+        "URLs:",
+        "This response is based",
+        "Please let me know",
+        "Is there anything else",
+    ]
+    for marker in cut_markers:
+        pos = t.lower().find(marker.lower())
+        if pos != -1:
+            t = t[:pos].strip()
     t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
     return t
 def infer_tags(question: str, answer: str) -> List[str]:
     text = f"{question} {answer}".lower()
     tags: List[str] = []
     keyword_map = {
+        "treatment": ["treat", "therapy", "management", "drug", "surgery"],
         "diagnosis": ["diagnosis", "diagnose", "criteria"],
+        "symptoms": ["symptom", "presentation", "sign", "feature"],
+        "ecg": ["ecg", "ekg", "st elevation", "qrs", "p wave", "arrhythmia", "tachycardia", "bradycardia"],
+        "investigation": ["test", "investigation", "mri", "ct", "lab", "imaging"],
+        "prognosis": ["prognosis", "outcome", "survival", "risk"],
+        "genetics": ["gene", "genetic", "mutation", "variant", "chromosome", "inherited", "inheritance"],
+        "etiology": ["cause", "causes", "caused by", "associated with", "risk factor"],
     }
     for tag, words in keyword_map.items():
         if any(w in text for w in words):
             tags.append(tag)
     return tags
     return len(q_words & t_words) / max(1, len(q_words))
+def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
+    if top_n is None:
+        top_n = cfg.top_k_final
+    q_words = set(re.findall(r"\w+", query.lower()))
+    scored = []
+    for d in docs:
+        question = d.metadata.get("question", "")
+        answer = d.metadata.get("answer", "")
+        tags = " ".join(d.metadata.get("tags", []))
+        text = f"{question} {answer} {tags}".lower()
+        t_words = set(re.findall(r"\w+", text))
+        overlap = len(q_words & t_words) / max(1, len(q_words))
+        question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
+        tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
+        sim_score = float(d.metadata.get("sim_score", 0.0))
+        final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
+        scored.append((d, final_score))
+    scored.sort(key=lambda x: x[1], reverse=True)
+    return [d for d, _ in scored[:top_n]]
 def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
+    if max_turns is None:
+        max_turns = cfg.max_chat_history_turns
     items = chat_history[-max_turns:]
     if not items:
         return "[EMPTY]"
     return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
 def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
+    if max_chars is None:
+        max_chars = cfg.max_context_chars
     blocks = []
     total = 0
     for i, d in enumerate(docs, 1):
         q = d.metadata.get("question", "")
         a = d.metadata.get("answer", "")
         tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
+        sim = d.metadata.get("sim_score", None)
         block = f"""
 ==============================
 EVIDENCE_ID: {i}
 SOURCE_ID: {d.metadata.get('id')}
 SOURCE_QUESTION: {q}
 SOURCE_TAGS: {tags}
+SIMILARITY: {sim if sim is not None else 'N/A'}
 EVIDENCE_TEXT:
 {a}
 ==============================
 """.strip()
         if total + len(block) > max_chars:
             break
         blocks.append(block)
         total += len(block) + 2
+    return "\n\n".join(blocks).strip()
+def compute_confidence(result: Dict) -> float:
+    best_score = result.get("best_score", -1.0)
+    validation = result.get("validation_status", "")
+    if validation.startswith("SUPPORTED"):
+        conf = best_score
+    elif validation.startswith("PARTLY_UNSUPPORTED"):
+        conf = best_score * 0.70
+    else:
+        conf = best_score * 0.40
+    return max(0.0, min(1.0, conf))
+def strong_retrieval(best_score: float, docs: List[Document]) -> bool:
+    return (
+        best_score >= cfg.strong_retrieval_threshold
+        and len(docs) >= cfg.strong_retrieval_min_docs
+    )
+def stream_text(text: str, step: int = 110):
+    acc = ""
+    for i in range(0, len(text), step):
+        acc += text[i:i + step]
+        yield acc
+# -------------------------------
 # EMBEDDINGS + VECTORSTORE
+# -------------------------------
 logger.info("Loading embeddings...")
+embeddings = HuggingFaceEmbeddings(model_name=cfg.embed_model_name)
 def build_vectorstore():
                     "question": q,
                     "answer": a,
                     "tags": infer_tags(q, a),
+                }
             )
         )
 logger.info("Vectorstore ready.")
+# -------------------------------
+# LOCAL MODEL + ECG ADAPTER
+# -------------------------------
 logger.info("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
     cfg.base_model_path,
     use_fast=True,
+    token=cfg.hf_token if cfg.hf_token else None
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 base_model.eval()
+logger.info("Loading ECG reasoning adapter...")
 reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
 reason_model.eval()
 def get_primary_model_device(model) -> torch.device:
     try:
         return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @torch.inference_mode()
 def run_local_reasoner(user_query: str, context: str) -> str:
     try:
         messages = [
             {"role": "system", "content": LOCAL_REASONING_SYSTEM},
+            {
+                "role": "user",
+                "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context if context.strip() else '[EMPTY]'}"
+            },
         ]
         prompt = tokenizer.apply_chat_template(
         gen_ids = out[0, inputs["input_ids"].shape[1]:]
         text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
+        text = strip_bad_sections(text)
+        return text if text else "INSUFFICIENT_EVIDENCE"
     except Exception as e:
         logger.error(f"Local reasoner error: {e}")
         traceback.print_exc()
         return "INSUFFICIENT_EVIDENCE"
+# -------------------------------
+# REMOTE LLM (DEEPSEEK)
+# -------------------------------
+deepseek_llm = ChatOpenAI(
+    model=cfg.deepseek_model,
+    api_key=cfg.deepseek_api_key,
+    base_url=cfg.deepseek_base_url,
+    temperature=cfg.deepseek_temperature,
+    max_tokens=cfg.deepseek_max_tokens,
+)
+_query_expansion_cache: Dict[str, str] = {}
+def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
+    try:
+        resp = deepseek_llm.invoke([
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ])
+        text = resp.content if hasattr(resp, "content") else str(resp)
+        text = strip_bad_sections(text)
+        return text if text.strip() else fallback
+    except Exception as e:
+        logger.error(f"DeepSeek error: {e}")
+        traceback.print_exc()
+        return fallback
+def run_query_expansion(user_query: str) -> str:
+    if not cfg.enable_query_expansion:
+        return user_query
+    if cfg.use_query_cache and user_query in _query_expansion_cache:
+        logger.info(f"Using cached expansion for: {user_query[:80]}")
+        return _query_expansion_cache[user_query]
+    prompt = f"""
+USER_QUERY:
+{user_query}
+Expand this for retrieval with close medical phrasing, synonyms, and alternate wording.
+Do not answer the question.
 """.strip()
+    expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
+    expanded = expanded.strip() if expanded else user_query
+    if cfg.use_query_cache:
+        _query_expansion_cache[user_query] = expanded
+    return expanded
+def run_deepseek_summary(
+    user_query: str,
+    context: str,
+    reasoning_draft: str,
+    chat_history: List[Dict[str, str]],
+) -> str:
     prompt = f"""
+CHAT_HISTORY:
+{history_to_text(chat_history)}
 USER_QUESTION:
 {user_query}
 RETRIEVED_EVIDENCE:
 {context if context.strip() else '[EMPTY]'}
+LOCAL_REASONING_DRAFT:
 {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
+Write a grounded final summary answer using only the evidence and reasoning draft.
 """.strip()
+    return llm_text(
+        DEEPSEEK_SUMMARY_SYSTEM,
+        prompt,
+        fallback="I could not generate a grounded summary from the retrieved evidence."
+    )
+def run_validator(context: str, answer: str) -> str:
+    if not cfg.enable_validator:
+        return "SUPPORTED (validator disabled)"
     prompt = f"""
+EVIDENCE:
+{context if context.strip() else '[EMPTY]'}
+ANSWER:
+{answer if answer.strip() else '[EMPTY]'}
 """.strip()
+    return llm_text(VALIDATOR_SYSTEM, prompt, fallback="PARTLY_UNSUPPORTED: validator unavailable")
+# -------------------------------
 # WARMUP
+# -------------------------------
 def warmup_models():
     logger.info("Warming up local reasoner...")
     try:
 EVIDENCE_ID: 1
 SOURCE_QUESTION: What are ECG findings in hyperkalemia?
 SOURCE_TAGS: ecg
 EVIDENCE_TEXT:
 Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
 ==============================
         logger.warning(f"Warmup failed: {e}")
+warmup_models()
+# -------------------------------
 # STATE
+# -------------------------------
+class ChatState(TypedDict, total=False):
     user_query: str
     expanded_query: str
+    chat_history: List[Dict[str, str]]
     retrieved_docs: List[Document]
     best_score: float
+    used_context: bool
     context: str
+    retrieval_attempts: int
+    retrieval_mode: str
+    reasoning_draft: str
     final_answer: str
+    validation_status: str
+# -------------------------------
 # RETRIEVAL
+# -------------------------------
+def retrieve_docs_once(query_for_search: str, original_query: str):
     try:
+        scored = vectorstore.similarity_search_with_score(
+            query_for_search,
+            k=cfg.similarity_k,
+        )
     except Exception as e:
         logger.error(f"Retriever error: {e}")
         traceback.print_exc()
         return [], -1.0
     filtered_docs = []
+    best_score = -1.0
     for doc, raw_score in scored:
         sim = score_to_similarity(raw_score)
+        best_score = max(best_score, sim)
         q = doc.metadata.get("question", "")
         a = doc.metadata.get("answer", "")
         ov = lexical_overlap(original_query, f"{q} {a}")
+        if ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity:
             new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
             new_doc.metadata["sim_score"] = sim
             new_doc.metadata["lexical_overlap"] = ov
             filtered_docs.append(new_doc)
     reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
     return reranked, best_score
+# -------------------------------
+# LANGGRAPH NODES
+# -------------------------------
+def retrieve_node(state: ChatState) -> ChatState:
+    query = state.get("expanded_query") or state["user_query"]
+    retrieval_attempts = int(state.get("retrieval_attempts", 0)) + 1
+    retrieval_mode = "expanded" if state.get("expanded_query") else "original"
+    docs, best_score = retrieve_docs_once(
+        query_for_search=query,
+        original_query=state["user_query"],
+    )
+    if not docs:
+        return {
+            "retrieved_docs": [],
+            "best_score": best_score,
+            "used_context": False,
+            "context": "",
+            "retrieval_attempts": retrieval_attempts,
+            "retrieval_mode": retrieval_mode,
+        }
+    return {
+        "retrieved_docs": docs,
+        "best_score": best_score,
+        "used_context": True,
+        "context": build_context_string(docs, max_chars=cfg.max_context_chars),
+        "retrieval_attempts": retrieval_attempts,
+        "retrieval_mode": retrieval_mode,
+    }
+def should_retry_retrieval(state: ChatState) -> str:
+    used_context = state.get("used_context", False)
+    best_score = state.get("best_score", -1.0)
+    attempts = int(state.get("retrieval_attempts", 0))
+    if used_context and best_score >= cfg.min_faiss_similarity:
+        return "local_reasoning"
     if not cfg.enable_query_expansion:
+        return "local_reasoning"
+    if attempts >= 2:
+        return "local_reasoning"
+    return "expand_query"
+def expand_query_node(state: ChatState) -> ChatState:
+    expanded = run_query_expansion(state["user_query"])
+    if not expanded.strip():
+        expanded = state["user_query"]
+    return {"expanded_query": expanded}
+def local_reasoning_node(state: ChatState) -> ChatState:
+    context = state.get("context", "").strip()
+    if not context:
+        return {"reasoning_draft": "INSUFFICIENT_EVIDENCE"}
+    reasoning = run_local_reasoner(state["user_query"], context)
+    return {"reasoning_draft": reasoning}
+def generate_node(state: ChatState) -> ChatState:
+    context = state.get("context", "").strip()
+    reasoning = state.get("reasoning_draft", "INSUFFICIENT_EVIDENCE")
+    history = state.get("chat_history", [])
+    if not context:
+        return {"final_answer": "I could not find sufficiently relevant evidence in the RAG database for this question."}
+    answer = run_deepseek_summary(
+        user_query=state["user_query"],
+        context=context,
+        reasoning_draft=reasoning,
+        chat_history=history,
+    )
+    return {"final_answer": answer}
+def validate_node(state: ChatState) -> ChatState:
+    context = state.get("context", "").strip()
+    answer = state.get("final_answer", "").strip()
+    best_score = state.get("best_score", -1.0)
+    docs = state.get("retrieved_docs", [])
+    if not context or not answer:
+        return {"validation_status": "INSUFFICIENT_EVIDENCE: missing context or answer"}
+    if strong_retrieval(best_score, docs):
+        return {"validation_status": "SUPPORTED (validator skipped due to strong retrieval)"}
+    verdict = run_validator(context, answer)
+    if verdict.startswith("SUPPORTED"):
+        return {"validation_status": verdict}
+    if verdict.startswith("PARTLY_UNSUPPORTED"):
+        return {
+            "validation_status": verdict,
+            "final_answer": answer + "\n\nEvidence limits: some parts may not be fully supported by the retrieved evidence."
         }
+    if verdict.startswith("INSUFFICIENT_EVIDENCE"):
+        return {
+            "validation_status": verdict,
+            "final_answer": answer + "\n\nEvidence limits: the retrieved evidence was weak or only partially relevant."
+        }
+    return {"validation_status": verdict}
+def finalize_node(state: ChatState) -> ChatState:
+    answer = strip_bad_sections(state.get("final_answer", ""))
+    if not answer:
+        answer = "I could not generate an answer."
+    return {"final_answer": answer}
+# -------------------------------
+# GRAPH
+# -------------------------------
+builder = StateGraph(ChatState)
+builder.add_node("retrieve", retrieve_node)
+builder.add_node("expand_query", expand_query_node)
+builder.add_node("local_reasoning", local_reasoning_node)
+builder.add_node("generate", generate_node)
+builder.add_node("validate", validate_node)
+builder.add_node("finalize", finalize_node)
+builder.add_edge(START, "retrieve")
+builder.add_conditional_edges(
+    "retrieve",
+    should_retry_retrieval,
+    {
+        "expand_query": "expand_query",
+        "local_reasoning": "local_reasoning",
+    }
+)
+builder.add_edge("expand_query", "retrieve")
+builder.add_edge("local_reasoning", "generate")
+builder.add_edge("generate", "validate")
+builder.add_edge("validate", "finalize")
+builder.add_edge("finalize", END)
+graph = builder.compile()
+logger.info("LangGraph compiled.")
+# -------------------------------
+# FORMATTING HELPERS
+# -------------------------------
+def format_sources_minimal(result: Optional[Dict]) -> str:
+    if not result:
+        return "## Retrieved Sources\n\nNo sources yet."
+    docs = result.get("retrieved_docs", [])
+    best_score = result.get("best_score", -1.0)
+    if not docs:
+        return (
+            "## Retrieved Sources\n\n"
+            "No sufficiently relevant evidence retrieved.\n\n"
+            f"**Best score:** `{best_score:.3f}`"
+        )
+    lines = [
+        "## Retrieved Sources",
+        f"**Best score:** `{best_score:.3f}`",
+        "",
+    ]
+    for i, d in enumerate(docs, 1):
+        question = d.metadata.get("question", "")
+        answer = d.metadata.get("answer", "")
+        similarity = d.metadata.get("sim_score", "N/A")
+        preview = answer[:210].strip()
+        if len(answer) > 210:
+            preview += "..."
+        lines.extend([
+            f"### Evidence {i}",
+            f"- **Question:** {question}",
+            f"- **Similarity:** `{similarity}`",
+            f"- **Preview:** {preview}",
+            "",
+        ])
+    return "\n".join(lines)
+def format_debug_text(result: Optional[Dict]) -> str:
+    if not result:
+        return "No debug result yet."
+    return f"""
+BEST SCORE: {result.get('best_score', -1.0)}
+USED CONTEXT: {result.get('used_context', False)}
+RETRIEVAL ATTEMPTS: {result.get('retrieval_attempts', 0)}
+RETRIEVAL MODE: {result.get('retrieval_mode', 'N/A')}
+VALIDATION STATUS: {result.get('validation_status', 'N/A')}
+----- CONTEXT -----
+{result.get('context', '')}
+----- LOCAL REASONING DRAFT -----
+{result.get('reasoning_draft', '')}
+""".strip()
+# -------------------------------
 # UI HELPERS
+# -------------------------------
 CUSTOM_CSS = """
+:root {
+    --bg-main: #07111f;
+    --bg-soft: #0b1728;
+    --card: rgba(10, 19, 35, 0.86);
+    --card-2: rgba(14, 25, 43, 0.94);
+    --border: rgba(148, 163, 184, 0.16);
+    --text: #e5eefb;
+    --muted: #94a3b8;
+    --primary: #7c3aed;
+    --primary-2: #2563eb;
+    --success: #10b981;
+}
 html, body, .gradio-container {
     margin: 0 !important;
     padding: 0 !important;
+    min-height: 100%;
+    background:
+        radial-gradient(circle at top left, rgba(124,58,237,0.22), transparent 28%),
+        radial-gradient(circle at top right, rgba(37,99,235,0.18), transparent 24%),
+        linear-gradient(180deg, #050b16 0%, #091321 100%);
+    color: var(--text);
 }
 .gradio-container {
+    max-width: 100% !important;
+    padding: 12px !important;
 }
+footer {
+    visibility: hidden;
+}
+.top-card {
+    border: 1px solid var(--border);
+    background: linear-gradient(135deg, rgba(11,23,40,0.95), rgba(18,31,56,0.92));
+    border-radius: 22px;
     padding: 16px;
     margin-bottom: 12px;
+    box-shadow: 0 14px 40px rgba(0,0,0,0.20);
 }
+.hero-title {
+    font-size: 1.6rem;
     font-weight: 800;
+    color: #f8fbff;
     margin-bottom: 6px;
+    line-height: 1.15;
 }
+.hero-subtitle {
     color: #cbd5e1;
+    font-size: 0.95rem;
+    line-height: 1.5;
+}
+.badges {
+    display: flex;
+    gap: 8px;
+    flex-wrap: wrap;
+    margin-top: 12px;
+}
+.badge {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    padding: 6px 10px;
+    border-radius: 999px;
+    font-size: 11px;
+    color: #e6eefc;
+    border: 1px solid rgba(255,255,255,0.12);
+    background: rgba(255,255,255,0.06);
+}
+.panel-wrap {
+    border: 1px solid var(--border);
+    background: linear-gradient(180deg, rgba(10,19,35,0.96), rgba(7,14,26,0.94));
+    border-radius: 20px;
+    padding: 12px;
+    box-shadow: 0 16px 45px rgba(0,0,0,0.22);
 }
 #chatbot {
+    height: min(62vh, 640px) !important;
+    min-height: 360px !important;
     border-radius: 18px !important;
+    border: 1px solid var(--border) !important;
+    overflow: hidden !important;
+    box-shadow: 0 14px 40px rgba(0,0,0,0.26) !important;
 }
+.status-card {
     padding: 12px 14px;
+    border-radius: 16px;
+    background: linear-gradient(135deg, #0f172a 0%, #172554 100%);
+    color: #f9fafb;
+    font-size: 14px;
+    border: 1px solid rgba(255,255,255,0.12);
+    box-shadow: 0 10px 30px rgba(0,0,0,0.2);
 }
+.muted {
+    color: #a5b4fc;
+    font-size: 12px;
+}
+.blink-dots {
+    font-size: 22px;
     font-weight: 800;
+    letter-spacing: 4px;
     animation: blinkDots 1s steps(1, end) infinite;
+    display: inline-block;
+    padding: 2px 0;
 }
 @keyframes blinkDots {
     0% { opacity: 1; }
+    50% { opacity: 0.15; }
     100% { opacity: 1; }
 }
 textarea, .gr-textbox textarea {
+    border-radius: 16px !important;
+    font-size: 15px !important;
+}
+.gr-textbox label, .gr-markdown, .gr-button {
+    font-size: 14px !important;
 }
 button {
     border-radius: 14px !important;
     min-height: 44px !important;
     font-weight: 600 !important;
 }
+.mobile-stack {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+}
+.mobile-scroll {
+    max-height: 34vh;
+    overflow-y: auto;
+}
+.command-note {
+    color: #cbd5e1;
+    font-size: 0.88rem;
+    line-height: 1.45;
+}
+@media (max-width: 1024px) {
+    .gradio-container {
+        padding: 10px !important;
+    }
+    .hero-title {
+        font-size: 1.45rem;
+    }
+    .hero-subtitle {
+        font-size: 0.92rem;
+    }
+    #chatbot {
+        height: 56vh !important;
+    }
+}
+@media (max-width: 768px) {
+    .gradio-container {
+        padding: 8px !important;
+    }
+    .top-card {
+        padding: 14px;
+        border-radius: 18px;
+    }
+    .hero-title {
+        font-size: 1.28rem;
+    }
+    .hero-subtitle {
+        font-size: 0.88rem;
+        line-height: 1.45;
+    }
+    .badge {
+        font-size: 10px;
+        padding: 5px 8px;
+    }
+    .panel-wrap {
+        padding: 10px;
+        border-radius: 16px;
+    }
+    #chatbot {
+        height: 52vh !important;
+        min-height: 320px !important;
+        border-radius: 16px !important;
+    }
+    button {
+        width: 100% !important;
+    }
+    .mobile-scroll {
+        max-height: 240px;
+    }
+}
+@media (max-width: 480px) {
+    .hero-title {
+        font-size: 1.15rem;
+    }
+    .hero-subtitle {
+        font-size: 0.83rem;
+    }
+    #chatbot {
+        height: 50vh !important;
+        min-height: 300px !important;
+    }
+    textarea, .gr-textbox textarea {
+        font-size: 14px !important;
+    }
+}
 """
+def hero_html() -> str:
     return """
+    <div class="top-card">
+        <div class="hero-title">🫀 Mr Cardio</div>
+        <div class="hero-subtitle">
+            ECG-focused clinical chatbot with RAG retrieval, local ECG reasoning,
+            and grounded evidence summaries. Mobile-friendly layout included.
+        </div>
+        <div class="badges">
+            <div class="badge">ECG Reasoning</div>
+            <div class="badge">FAISS Retrieval</div>
+            <div class="badge">LoRA Adapter</div>
+            <div class="badge">Validated Output</div>
         </div>
     </div>
     """
 def thinking_html(stage: str) -> str:
     return f"""
+    <div class="status-card">
+        <div style="display:flex;align-items:center;gap:12px;">
+            <div style="font-size:19px;">⏳</div>
+            <div>
+                <div style="font-weight:700;">{stage}</div>
+                <div class="muted">Retrieval → reasoning → grounded answer</div>
+                <div class="blink-dots">...</div>
+            </div>
+        </div>
     </div>
     """
+def initialize_session():
+    return {"chat_history": [], "last_result": None}
+def add_assistant_placeholder(history, text="..."):
     history = history or []
+    history.append({
+        "role": "assistant",
+        "content": text,
+        "metadata": {"title": "Thinking"}
+    })
     return history
+def update_last_assistant_message(history, text, title=None):
     history = history or []
     if not history or history[-1]["role"] != "assistant":
+        msg = {"role": "assistant", "content": text}
+        if title:
+            msg["metadata"] = {"title": title}
+        history.append(msg)
         return history
+    history[-1] = {"role": "assistant", "content": text}
+    if title:
+        history[-1]["metadata"] = {"title": title}
     return history
+def user_submit(user_message, chat_ui_history):
+    chat_ui_history = chat_ui_history or []
     user_message = (user_message or "").strip()
     if not user_message:
+        return "", chat_ui_history
+    chat_ui_history.append({"role": "user", "content": user_message})
+    return "", chat_ui_history
+# -------------------------------
+# CORE CHAT
+# -------------------------------
+def run_chat_turn(user_message: str, memory_state: Dict) -> Dict:
+    if memory_state is None:
+        memory_state = {"chat_history": [], "last_result": None}
+    state_in = {
+        "user_query": user_message,
+        "chat_history": memory_state["chat_history"],
+        "retrieval_attempts": 0,
+    }
+    try:
+        result = graph.invoke(state_in)
+    except Exception as e:
+        logger.error(f"Graph invocation error: {e}")
+        traceback.print_exc()
+        result = {
+            "final_answer": f"I hit a runtime error while processing the request: {e}",
+            "best_score": -1.0,
+            "used_context": False,
+            "validation_status": "ERROR",
+            "retrieved_docs": [],
+            "context": "",
+            "reasoning_draft": "",
+            "retrieval_attempts": 0,
+            "retrieval_mode": "error",
+        }
+    answer = result.get("final_answer", "").strip() or "I could not generate an answer."
+    best_score = result.get("best_score", -1.0)
+    validation_status = result.get("validation_status", "N/A")
+    confidence = compute_confidence(result)
+    answer_with_footer = (
+        f"{answer}\n\n---\n"
+        f"📊 confidence={confidence:.2f} | best_score={best_score:.3f} | validation={validation_status}"
+    )
+    memory_state["chat_history"].append({"role": "user", "content": user_message})
+    memory_state["chat_history"].append({"role": "assistant", "content": answer})
+    memory_state["chat_history"] = memory_state["chat_history"][-12:]
+    memory_state["last_result"] = result
+    return {
+        "answer": answer_with_footer,
+        "memory_state": memory_state,
+        "sources_markdown": format_sources_minimal(result),
+        "debug_text": format_debug_text(result),
+    }
+def bot_respond_stream(chat_ui_history, session_state):
+    global vectorstore
     if session_state is None:
         session_state = initialize_session()
+    if not chat_ui_history:
+        yield (
+            chat_ui_history,
+            session_state,
+            "## Retrieved Sources\n\nNo sources yet.",
+            "No debug result yet.",
+            ""
+        )
+        return
+    user_message = str(chat_ui_history[-1]["content"]).strip()
+    if user_message == "/sources":
+        result = session_state.get("last_result")
+        chat_ui_history.append({
+            "role": "assistant",
+            "content": format_sources_minimal(result),
+            "metadata": {"title": "Sources"}
+        })
+        yield (
+            chat_ui_history,
+            session_state,
+            format_sources_minimal(result),
+            format_debug_text(result),
+            ""
+        )
         return
+    if user_message == "/debug":
+        result = session_state.get("last_result")
+        chat_ui_history.append({
+            "role": "assistant",
+            "content": format_debug_text(result),
+            "metadata": {"title": "Debug"}
+        })
+        yield (
+            chat_ui_history,
+            session_state,
+            format_sources_minimal(result),
+            format_debug_text(result),
+            ""
+        )
+        return
+    if user_message == "/rebuild":
+        if not cfg.allow_rebuild_vectorstore:
+            chat_ui_history.append({
+                "role": "assistant",
+                "content": "Vector store rebuild is disabled on this Space.",
+                "metadata": {"title": "Restricted"}
+            })
+            yield (
+                chat_ui_history,
+                session_state,
+                format_sources_minimal(session_state.get("last_result")),
+                format_debug_text(session_state.get("last_result")),
+                ""
+            )
+            return
+        chat_ui_history = add_assistant_placeholder(chat_ui_history)
+        yield (
+            chat_ui_history,
+            session_state,
+            "",
+            "",
+            thinking_html("Rebuilding vector store")
+        )
+        time.sleep(cfg.blink_stage_1)
+        chat_ui_history = update_last_assistant_message(
+            chat_ui_history,
+            "Rebuilding vector store and reloading embeddings...",
+            title="Maintenance"
+        )
+        yield (
+            chat_ui_history,
+            session_state,
+            "",
+            "",
+            thinking_html("Rebuilding vector store")
+        )
+        build_vectorstore()
+        vectorstore = load_vectorstore()
+        chat_ui_history = update_last_assistant_message(
+            chat_ui_history,
+            "✅ Vector store rebuilt and reloaded.",
+            title="Done"
+        )
+        yield (
+            chat_ui_history,
+            session_state,
+            format_sources_minimal(session_state.get("last_result")),
+            format_debug_text(session_state.get("last_result")),
+            ""
+        )
+        return
+    chat_ui_history = add_assistant_placeholder(chat_ui_history, text="...")
+    yield (
+        chat_ui_history,
+        session_state,
+        "",
+        "",
+        thinking_html("Starting")
+    )
+    time.sleep(cfg.blink_stage_1)
+    yield (
+        chat_ui_history,
+        session_state,
+        "",
+        "",
+        thinking_html("Retrieving evidence")
+    )
+    time.sleep(cfg.blink_stage_2)
+    yield (
+        chat_ui_history,
+        session_state,
+        "",
+        "",
+        thinking_html("Running ECG adapter reasoning")
+    )
+    time.sleep(cfg.blink_stage_3)
+    out = run_chat_turn(user_message, session_state)
+    yield (
+        chat_ui_history,
+        session_state,
+        out["sources_markdown"],
+        out["debug_text"],
+        thinking_html("Generating grounded summary")
+    )
+    time.sleep(cfg.blink_before_answer)
     if cfg.enable_typewriter_stream:
+        for partial in stream_text(out["answer"], step=120):
+            chat_ui_history = update_last_assistant_message(
+                chat_ui_history,
+                partial,
+                title="Answer"
+            )
+            yield (
+                chat_ui_history,
+                session_state,
+                out["sources_markdown"],
+                out["debug_text"],
+                ""
+            )
+    chat_ui_history = update_last_assistant_message(
+        chat_ui_history,
+        out["answer"],
+        title="Answer"
+    )
+    yield (
+        chat_ui_history,
+        out["memory_state"],
+        out["sources_markdown"],
+        out["debug_text"],
+        ""
+    )
+def clear_chat():
+    return [], initialize_session(), "## Retrieved Sources\n\nNo sources yet.", "No debug result yet.", ""
+def rebuild_from_button(session_state, chatbot_history):
+    global vectorstore
+    if not cfg.allow_rebuild_vectorstore:
+        chatbot_history = chatbot_history or []
+        chatbot_history.append({
+            "role": "assistant",
+            "content": "Vector store rebuild is disabled on this Space.",
+            "metadata": {"title": "Restricted"}
+        })
+        return (
+            chatbot_history,
+            session_state,
+            format_sources_minimal(session_state.get("last_result")),
+            format_debug_text(session_state.get("last_result")),
+            ""
+        )
+    build_vectorstore()
+    vectorstore = load_vectorstore()
+    chatbot_history = chatbot_history or []
+    chatbot_history.append({
+        "role": "assistant",
+        "content": "✅ Vector store rebuilt and reloaded.",
+        "metadata": {"title": "Done"}
+    })
+    return (
+        chatbot_history,
+        session_state,
+        format_sources_minimal(session_state.get("last_result")),
+        format_debug_text(session_state.get("last_result")),
+        ""
+    )
+# -------------------------------
 # APP
+# -------------------------------
+with gr.Blocks(
+    title="Medical CSV RAG Chatbot",
+    css=CUSTOM_CSS,
+    theme=gr.themes.Soft(
+        primary_hue="indigo",
+        secondary_hue="blue",
+        neutral_hue="slate",
+        radius_size="lg",
+        text_size="md",
+    ),
+) as demo:
+    gr.HTML(hero_html())
     session_state = gr.State(initialize_session())
+    with gr.Column(elem_classes=["mobile-stack"]):
+        with gr.Group(elem_classes=["panel-wrap"]):
+            chatbot = gr.Chatbot(
+                label="Clinical Chat",
+                height=640,
+                elem_id="chatbot",
+                type="messages",
+                show_copy_button=True,
+                bubble_full_width=False,
+                avatar_images=(None, None),
+            )
+            user_box = gr.Textbox(
+                label="Ask a medical question",
+                placeholder="e.g. What are the ECG findings in hyperkalemia?",
+                lines=2,
+                autofocus=True,
+            )
+            status_html = gr.HTML("")
+            with gr.Row():
+                send_btn = gr.Button("Send", variant="primary")
+                clear_btn = gr.Button("Clear")
+                rebuild_btn = gr.Button("Rebuild Store")
+            gr.HTML(
+                """
+                <div class="command-note">
+                    Commands: <code>/sources</code>, <code>/debug</code>, <code>/rebuild</code>
+                </div>
+                """
+            )
+        with gr.Accordion("Retrieved Sources", open=False):
+            with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
+                sources_panel = gr.Markdown("## Retrieved Sources\n\nNo sources yet.")
+        if cfg.show_debug_panel:
+            with gr.Accordion("Debug Panel", open=False):
+                with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
+                    debug_panel = gr.Textbox(
+                        label="Debug",
+                        value="No debug result yet.",
+                        lines=18,
+                        max_lines=28,
+                        interactive=False,
+                    )
+        else:
+            debug_panel = gr.Textbox(visible=False, value="")
     submit_event = user_box.submit(
         fn=user_submit,
         outputs=[user_box, chatbot],
         queue=True,
     )
     submit_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
+        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
         queue=True,
     )
+    send_click = send_btn.click(
         fn=user_submit,
         inputs=[user_box, chatbot],
         outputs=[user_box, chatbot],
         queue=True,
     )
+    send_click.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
+        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
         queue=True,
     )
     clear_btn.click(
         fn=clear_chat,
         inputs=[],
+        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
         queue=False,
     )
+    rebuild_btn.click(
+        fn=rebuild_from_button,
+        inputs=[session_state, chatbot],
+        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
+        queue=True,
+    )
 demo.queue(default_concurrency_limit=1)
         debug=cfg.launch_debug,
         server_name=cfg.server_name,
         server_port=cfg.server_port,
+    )