Spaces:

resberry
/

MrCardio

Sleeping

App Files Files Community

resberry commited on Apr 9

Commit

eb76838

verified ·

1 Parent(s): 3364d8d

Update app.py

Browse files

Files changed (1) hide show

app.py +546 -1419

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 import re
-raw_omp = str(os.getenv("OMP_NUM_THREADS", "1")).strip()
-os.environ["OMP_NUM_THREADS"] = raw_omp if re.fullmatch(r"\d+", raw_omp) else "1"
 import time
-import traceback
 import logging
-from typing import List, Dict, TypedDict, Optional
 from dataclasses import dataclass, field
 import torch
@@ -21,50 +20,44 @@ from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
-from langgraph.graph import StateGraph, START, END
 # ============================================================
-# HUGGING FACE SPACES READY
-# Medical CSV RAG Chatbot + Normal Chat Mode
-# Modes:
-#   1) ECG RAG Mode -> retrieval -> local ECG reasoning -> grounded summary
-#   2) Normal Chat Mode -> standard chatbot response
-# Extra:
-#   3) Automatic ECG/Cardiology mode switching from user text
 # ============================================================
-# -------------------------------
 # LOGGING
-# -------------------------------
 logging.basicConfig(
     level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s"
 )
-logger = logging.getLogger(__name__)
-# -------------------------------
 # CONFIG
-# -------------------------------
 @dataclass
 class Config:
-    base_model_path: str = os.getenv(
-        "BASE_MODEL_PATH",
-        "meta-llama/Llama-3.1-8B-Instruct"
-    )
-    adapter_dir: str = os.getenv(
-        "ADAPTER_DIR",
-        "adapter_refined_v10"
-    )
-    data_csv: str = os.getenv(
-        "DATA_CSV",
-        "RAGmaterials/ECG_RAG_only_clean.csv"
-    )
-    rag_dir: str = os.getenv(
-        "RAG_DIR",
-        "RAGmaterials"
-    )
     vectorstore_dir: str = field(init=False)
     hf_token: str = os.getenv("HF_TOKEN", "")
@@ -72,56 +65,41 @@ class Config:
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
-    deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.1"))
-    deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "700"))
-    embed_model_name: str = os.getenv(
-        "EMBED_MODEL_NAME",
-        "sentence-transformers/all-MiniLM-L6-v2"
-    )
-    similarity_k: int = int(os.getenv("SIMILARITY_K", "12"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
-    max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5200"))
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
-    max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "180"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
-    min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.08"))
-    min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.20"))
-    strong_retrieval_threshold: float = float(os.getenv("STRONG_RETRIEVAL_THRESHOLD", "0.30"))
-    strong_retrieval_min_docs: int = int(os.getenv("STRONG_RETRIEVAL_MIN_DOCS", "3"))
-    use_query_cache: bool = os.getenv("USE_QUERY_CACHE", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
-    enable_validator: bool = os.getenv("ENABLE_VALIDATOR", "true").lower() == "true"
     enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
-    show_debug_panel: bool = os.getenv("SHOW_DEBUG_PANEL", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
-    use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
-    blink_stage_1: float = float(os.getenv("BLINK_STAGE_1", "0.40"))
-    blink_stage_2: float = float(os.getenv("BLINK_STAGE_2", "0.55"))
-    blink_stage_3: float = float(os.getenv("BLINK_STAGE_3", "0.50"))
-    blink_before_answer: float = float(os.getenv("BLINK_BEFORE_ANSWER", "0.25"))
     def __post_init__(self):
         self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
         os.makedirs(self.rag_dir, exist_ok=True)
         if not self.deepseek_api_key:
-            raise ValueError("Missing DEEPSEEK_API_KEY. Add it in Hugging Face Space Secrets.")
         if not self.hf_token:
-            raise ValueError(
-                "Missing HF_TOKEN. Add a valid Hugging Face token with access to the gated base model."
-            )
         for path, name in [
             (self.adapter_dir, "Adapter directory"),
@@ -135,20 +113,38 @@ cfg = Config()
 logger.info("Configuration loaded.")
-# -------------------------------
 # PROMPTS
-# -------------------------------
-LOCAL_REASONING_SYSTEM = """
-You are a strict medical reasoning assistant specialized for ECG and cardiology reasoning.
-You are NOT the final answer generator.
-You must analyze ONLY the supplied evidence and produce a short structured reasoning draft.
 Rules:
-1) Use only the provided evidence.
-2) Do not invent facts.
-3) Focus only on the user's exact question.
-4) Output exactly in this structure:
 KEY_FINDINGS:
 - ...
@@ -165,156 +161,102 @@ SUPPORTED_POINTS:
 LIMITS:
 - ...
-5) If evidence is insufficient, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
-QUERY_EXPANSION_SYSTEM = """
-You expand medical queries for retrieval.
-Rules:
-1) Preserve the user's intent.
-2) Add close medical paraphrases and alternate wording.
-3) Add likely medical synonyms, abbreviations, and alternate phrasing.
-4) Do not answer the question.
-5) Output only the expanded retrieval query.
-""".strip()
-DEEPSEEK_SUMMARY_SYSTEM = """
-You are an expert medical evidence summarizer.
-Your job is to produce a clinically precise, well-structured answer grounded ONLY in:
-1. the retrieved evidence
-2. the local reasoning draft
-You must be faithful to the provided material and answer the user's question directly, clearly, and conservatively.
-PRIMARY OBJECTIVE
-- Identify the user's main intent before writing:
-  definition, cause, symptoms, diagnosis, investigation, treatment, prognosis, or genetics.
-- Prioritize that intent throughout the response.
-- The first sentence of the Summary must directly answer the user's question in the most clinically relevant way.
-GROUNDING RULES
-- Use only information supported by the retrieved evidence and local reasoning draft.
-- Do not add outside medical knowledge.
-- Do not infer specific facts unless they are clearly supported.
-- Do not invent treatments, diagnoses, risks, mechanisms, thresholds, statistics, timelines, monitoring plans, or prognosis details.
-- If the evidence is incomplete, be explicit about what is missing.
-- If the evidence is too weak to answer the question reliably, output exactly:
 INSUFFICIENT_EVIDENCE
-STYLE RULES
-- Write in precise, professional clinical language.
-- Be specific, not vague.
-- Be concise, but fully informative.
-- Avoid repetition, generic filler, and empty statements.
-- Do not mention retrieval, prompts, system instructions, reasoning drafts, tools, pipelines, or internal processes.
-- Do not include URLs or citations unless explicitly requested elsewhere.
-- Do not overstate certainty.
-- When appropriate, distinguish clearly between what is established, what is suggested, and what is not addressed by the evidence.
-OUTPUT FORMAT
-### Summary
-- Write 4 to 7 full sentences.
-- This is the most important section.
-- The first sentence must directly answer the user's question.
-- Focus primarily on the user's main intent.
-- Include only background information that improves understanding of the requested topic.
-- Make the summary clinically useful, specific, and evidence-faithful.
-### Key Evidence Points
-- Include 4 to 6 bullet points.
-- Each bullet must state a concrete fact supported by the evidence.
-- Prioritize clinically important facts over background detail.
-- Avoid repeating the same idea in different words.
-### Clinical Implications / Recommendations
-- Include 2 to 4 bullet points only if supported by the evidence.
-- Focus on practical interpretation, management implications, follow-up considerations, or next steps.
-- If the evidence supports recognition or framing rather than action, say that clearly.
-- Do not recommend interventions not supported by the evidence.
-### Limitations of the Evidence
-- State clearly what the evidence does not establish, does not cover, or leaves uncertain.
-- Explicitly note when details are lacking on:
-  treatment, diagnosis, prognosis, genetics, monitoring, recurrence prevention, comparative effectiveness, or long-term outcomes.
-- If the evidence is narrow, low-detail, or only partially aligned with the question, say so plainly.
-SPECIAL INSTRUCTIONS BY QUESTION TYPE
-For treatment questions:
-- Focus primarily on treatment and management, not disease definition.
-- Organize treatment information in this order whenever supported by the evidence:
-  1. supportive or conservative care
-  2. symptomatic drug therapy or procedural treatment
-  3. long-term prevention, follow-up, or recurrence prevention
-- Distinguish treatment of active symptoms from prevention of recurrence or complications.
-- If the condition is benign, self-limited, or often does not require treatment, state that clearly in the first sentence.
-For diagnosis or investigation questions:
-- Focus on how the condition is identified, evaluated, or differentiated.
-- Prioritize diagnostic features, testing approach, and clinically useful distinctions.
-- Do not drift into treatment unless the evidence clearly supports it and it helps answer the question.
-For cause or risk questions:
-- Focus on etiologies, risk factors, mechanisms, or associations supported by the evidence.
-- Distinguish established causes from possible contributors if the evidence is less certain.
-For prognosis questions:
-- Focus on expected course, complications, recurrence, or outcome-related information supported by the evidence.
-- Do not add prognostic claims not explicitly supported.
-QUALITY CHECK BEFORE OUTPUT
-Before finalizing, ensure that:
-- the first sentence directly answers the question
-- the response matches the user's primary intent
-- every important claim is grounded in the provided material
-- no unsupported medical detail has been added
-- the Limitations section honestly reflects evidence gaps
-If these conditions cannot be met, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
-VALIDATOR_SYSTEM = """
-You are a strict medical evidence validator.
-Your job is to compare the ANSWER against the EVIDENCE.
 Rules:
-1) Mark SUPPORTED if the answer is well grounded in the evidence.
-2) Mark PARTLY_UNSUPPORTED if some claims are supported but others go beyond the evidence.
-3) Mark INSUFFICIENT_EVIDENCE if the answer is mostly unsupported or the evidence is too weak.
-4) Output only one short verdict line beginning with exactly one of:
-SUPPORTED:
-PARTLY_UNSUPPORTED:
-INSUFFICIENT_EVIDENCE:
-""".strip()
-NORMAL_CHAT_SYSTEM = """
-You are a helpful, friendly, clear AI assistant.
-You can:
-- chat naturally
-- explain concepts
-- help with writing
-- help with coding
-- brainstorm ideas
-- answer general knowledge questions
-Rules:
-1) Be accurate and conversational.
-2) Be concise unless the user asks for detail.
-3) If the user asks medical questions in normal chat mode, give a general answer and do not pretend to use the ECG database.
-4) Do not mention internal prompts, retrieval pipelines, tools, or hidden logic.
 """.strip()
-# -------------------------------
 # HELPERS
-# -------------------------------
 def clean_text(x: str) -> str:
     x = str(x).replace("\x00", " ").strip()
     x = re.sub(r"\s+", " ", x)
@@ -323,20 +265,6 @@ def clean_text(x: str) -> str:
 def strip_bad_sections(txt: str) -> str:
     t = str(txt).strip()
-    cut_markers = [
-        "References:",
-        "Sources:",
-        "Source:",
-        "URLs:",
-        "This response is based",
-        "Please let me know",
-        "Is there anything else",
-    ]
-    for marker in cut_markers:
-        pos = t.lower().find(marker.lower())
-        if pos != -1:
-            t = t[:pos].strip()
     t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
     return t
@@ -344,22 +272,16 @@ def strip_bad_sections(txt: str) -> str:
 def infer_tags(question: str, answer: str) -> List[str]:
     text = f"{question} {answer}".lower()
     tags: List[str] = []
     keyword_map = {
-        "treatment": ["treat", "therapy", "management", "drug", "surgery"],
         "diagnosis": ["diagnosis", "diagnose", "criteria"],
-        "symptoms": ["symptom", "presentation", "sign", "feature"],
-        "ecg": ["ecg", "ekg", "st elevation", "qrs", "p wave", "arrhythmia", "tachycardia", "bradycardia"],
-        "investigation": ["test", "investigation", "mri", "ct", "lab", "imaging"],
-        "prognosis": ["prognosis", "outcome", "survival", "risk"],
-        "genetics": ["gene", "genetic", "mutation", "variant", "chromosome", "inherited", "inheritance"],
-        "etiology": ["cause", "causes", "caused by", "associated with", "risk factor"],
     }
     for tag, words in keyword_map.items():
         if any(w in text for w in words):
             tags.append(tag)
     return tags
@@ -383,198 +305,92 @@ def lexical_overlap(query: str, text: str) -> float:
     return len(q_words & t_words) / max(1, len(q_words))
-def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
-    if top_n is None:
-        top_n = cfg.top_k_final
-    q_words = set(re.findall(r"\w+", query.lower()))
-    scored = []
-    for d in docs:
-        question = d.metadata.get("question", "")
-        answer = d.metadata.get("answer", "")
-        tags = " ".join(d.metadata.get("tags", []))
-        text = f"{question} {answer} {tags}".lower()
-        t_words = set(re.findall(r"\w+", text))
-        overlap = len(q_words & t_words) / max(1, len(q_words))
-        question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
-        tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
-        sim_score = float(d.metadata.get("sim_score", 0.0))
-        final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
-        scored.append((d, final_score))
-    scored.sort(key=lambda x: x[1], reverse=True)
-    return [d for d, _ in scored[:top_n]]
 def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
-    if max_turns is None:
-        max_turns = cfg.max_chat_history_turns
     items = chat_history[-max_turns:]
     if not items:
         return "[EMPTY]"
     return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
 def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
-    if max_chars is None:
-        max_chars = cfg.max_context_chars
     blocks = []
     total = 0
     for i, d in enumerate(docs, 1):
         q = d.metadata.get("question", "")
         a = d.metadata.get("answer", "")
         tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
-        sim = d.metadata.get("sim_score", None)
         block = f"""
 ==============================
 EVIDENCE_ID: {i}
 SOURCE_ID: {d.metadata.get('id')}
 SOURCE_QUESTION: {q}
 SOURCE_TAGS: {tags}
-SIMILARITY: {sim if sim is not None else 'N/A'}
 EVIDENCE_TEXT:
 {a}
 ==============================
 """.strip()
         if total + len(block) > max_chars:
             break
         blocks.append(block)
         total += len(block) + 2
     return "\n\n".join(blocks).strip()
-def compute_confidence(result: Dict) -> float:
-    best_score = result.get("best_score", -1.0)
-    validation = result.get("validation_status", "")
-    if validation.startswith("SUPPORTED"):
-        conf = best_score
-    elif validation.startswith("PARTLY_UNSUPPORTED"):
-        conf = best_score * 0.70
-    else:
-        conf = best_score * 0.40
-    return max(0.0, min(1.0, conf))
-def strong_retrieval(best_score: float, docs: List[Document]) -> bool:
-    return (
-        best_score >= cfg.strong_retrieval_threshold
-        and len(docs) >= cfg.strong_retrieval_min_docs
-    )
-def stream_text(text: str, step: int = 110):
     acc = ""
     for i in range(0, len(text), step):
         acc += text[i:i + step]
         yield acc
-# -------------------------------
-# AUTO MODE SWITCH DETECTION
-# -------------------------------
-ECG_MODE_PATTERNS = [
-    r"\becg\b",
-    r"\bekg\b",
-    r"\bcardiology\b",
-    r"\bcardio\b",
-    r"\barrhythmia\b",
-    r"\bheart rhythm\b",
-    r"\becg mode\b",
-    r"\bcardiology mode\b",
-    r"\bmedical mode\b",
-]
-ECG_SWITCH_PHRASES = [
-    r"switch to ecg",
-    r"switch into ecg",
-    r"switch to cardiology",
-    r"switch into cardiology",
-    r"switch to ecg and cardiology",
-    r"switch into ecg and cardiology",
-    r"ecg and cardiology",
-    r"medical ecg cardiology",
-    r"i want to ask ecg",
-    r"i want to ask ecr",
-    r"i want ecg",
-    r"ecg questions",
-    r"cardiology questions",
-    r"ecg only",
-    r"cardiology only",
-    r"activate ecg",
-    r"activate cardiology",
-]
-NORMAL_SWITCH_PHRASES = [
-    r"switch to normal",
-    r"normal chat",
-    r"back to normal",
-    r"exit ecg",
-    r"leave ecg mode",
-    r"turn off ecg mode",
-]
-def normalize_user_text(text: str) -> str:
-    text = str(text or "").lower().strip()
-    text = re.sub(r"\s+", " ", text)
-    return text
-def detect_mode_switch_request(user_message: str) -> Optional[str]:
-    text = normalize_user_text(user_message)
-    for pat in NORMAL_SWITCH_PHRASES:
-        if re.search(pat, text):
-            return "normal_chat"
-    strong_switch = any(re.search(pat, text) for pat in ECG_SWITCH_PHRASES)
-    ecg_present = any(re.search(pat, text) for pat in ECG_MODE_PATTERNS)
-    if strong_switch or (
-        ("switch" in text or "mode" in text or "questions" in text or "related" in text)
-        and ecg_present
-    ):
-        return "ecg_rag"
-    return None
-def mode_switch_message(mode_value: str) -> str:
-    if mode_value == "ecg_rag":
-        return (
-            "❤️ **ECG & Cardiology Mode activated**\n\n"
-            "UI updated successfully.\n"
-            "Ready for **medical, ECG, and cardiology** questions."
-        )
-    return (
-        "💬 **Normal Chat Mode activated**\n\n"
-        "UI updated successfully.\n"
-        "Ready for general conversation again."
-    )
-# -------------------------------
 # EMBEDDINGS + VECTORSTORE
-# -------------------------------
 logger.info("Loading embeddings...")
 embeddings = HuggingFaceEmbeddings(
     model_name=cfg.embed_model_name,
     model_kwargs={
         "device": "cuda" if torch.cuda.is_available() else "cpu",
-        "token": None,
     },
     encode_kwargs={"normalize_embeddings": True},
 )
@@ -605,7 +421,7 @@ def build_vectorstore():
                     "question": q,
                     "answer": a,
                     "tags": infer_tags(q, a),
-                }
             )
         )
@@ -630,16 +446,15 @@ vectorstore = load_vectorstore()
 logger.info("Vectorstore ready.")
-# -------------------------------
-# LOCAL MODEL + ECG ADAPTER
-# -------------------------------
 logger.info("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
     cfg.base_model_path,
     use_fast=True,
-    token=cfg.hf_token if cfg.hf_token else None
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
@@ -680,10 +495,19 @@ if base_model is None:
 base_model.eval()
-logger.info("Loading ECG reasoning adapter...")
 reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
 reason_model.eval()
 def get_primary_model_device(model) -> torch.device:
     try:
@@ -692,15 +516,50 @@ def get_primary_model_device(model) -> torch.device:
         return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @torch.inference_mode()
 def run_local_reasoner(user_query: str, context: str) -> str:
     try:
         messages = [
             {"role": "system", "content": LOCAL_REASONING_SYSTEM},
-            {
-                "role": "user",
-                "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context if context.strip() else '[EMPTY]'}"
-            },
         ]
         prompt = tokenizer.apply_chat_template(
@@ -732,76 +591,31 @@ def run_local_reasoner(user_query: str, context: str) -> str:
         gen_ids = out[0, inputs["input_ids"].shape[1]:]
         text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
-        text = strip_bad_sections(text)
-        return text if text else "INSUFFICIENT_EVIDENCE"
     except Exception as e:
         logger.error(f"Local reasoner error: {e}")
         traceback.print_exc()
         return "INSUFFICIENT_EVIDENCE"
-# -------------------------------
-# REMOTE LLM (DEEPSEEK)
-# -------------------------------
-deepseek_llm = ChatOpenAI(
-    model=cfg.deepseek_model,
-    api_key=cfg.deepseek_api_key,
-    base_url=cfg.deepseek_base_url,
-    temperature=cfg.deepseek_temperature,
-    max_tokens=cfg.deepseek_max_tokens,
-)
-_query_expansion_cache: Dict[str, str] = {}
-def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
-    try:
-        resp = deepseek_llm.invoke([
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ])
-        text = resp.content if hasattr(resp, "content") else str(resp)
-        text = strip_bad_sections(text)
-        return text if text.strip() else fallback
-    except Exception as e:
-        logger.error(f"DeepSeek error: {e}")
-        traceback.print_exc()
-        return fallback
-def run_query_expansion(user_query: str) -> str:
-    if not cfg.enable_query_expansion:
-        return user_query
-    if cfg.use_query_cache and user_query in _query_expansion_cache:
-        logger.info(f"Using cached expansion for: {user_query[:80]}")
-        return _query_expansion_cache[user_query]
     prompt = f"""
-USER_QUERY:
-{user_query}
-Expand this for retrieval with close medical phrasing, synonyms, and alternate wording.
-Do not answer the question.
-""".strip()
-    expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
-    expanded = expanded.strip() if expanded else user_query
-    if cfg.use_query_cache:
-        _query_expansion_cache[user_query] = expanded
-    return expanded
-def run_deepseek_summary(
-    user_query: str,
-    context: str,
-    reasoning_draft: str,
-    chat_history: List[Dict[str, str]],
-) -> str:
     prompt = f"""
 CHAT_HISTORY:
 {history_to_text(chat_history)}
@@ -814,30 +628,28 @@ RETRIEVED_EVIDENCE:
 LOCAL_REASONING_DRAFT:
 {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
-Write a grounded final summary answer using only the evidence and reasoning draft.
 """.strip()
-    return llm_text(
-        DEEPSEEK_SUMMARY_SYSTEM,
-        prompt,
-        fallback="I could not generate a grounded summary from the retrieved evidence."
-    )
-def run_validator(context: str, answer: str) -> str:
-    if not cfg.enable_validator:
-        return "SUPPORTED (validator disabled)"
     prompt = f"""
-EVIDENCE:
 {context if context.strip() else '[EMPTY]'}
-ANSWER:
-{answer if answer.strip() else '[EMPTY]'}
-""".strip()
-    return llm_text(VALIDATOR_SYSTEM, prompt, fallback="PARTLY_UNSUPPORTED: validator unavailable")
 def run_normal_chat(user_query: str, chat_history: List[Dict[str, str]]) -> str:
@@ -847,20 +659,13 @@ CHAT_HISTORY:
 USER_MESSAGE:
 {user_query}
-Respond as a normal helpful chatbot.
 """.strip()
-    return llm_text(
-        NORMAL_CHAT_SYSTEM,
-        prompt,
-        fallback="Sorry, I could not generate a response."
-    )
-# -------------------------------
 # WARMUP
-# -------------------------------
 def warmup_models():
     logger.info("Warming up local reasoner...")
     try:
@@ -871,6 +676,7 @@ def warmup_models():
 EVIDENCE_ID: 1
 SOURCE_QUESTION: What are ECG findings in hyperkalemia?
 SOURCE_TAGS: ecg
 EVIDENCE_TEXT:
 Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
 ==============================
@@ -881,38 +687,58 @@ Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe
         logger.warning(f"Warmup failed: {e}")
-warmup_models()
-# -------------------------------
 # STATE
-# -------------------------------
-class ChatState(TypedDict, total=False):
     user_query: str
-    expanded_query: str
     chat_history: List[Dict[str, str]]
     retrieved_docs: List[Document]
     best_score: float
-    used_context: bool
     context: str
-    retrieval_attempts: int
-    retrieval_mode: str
-    reasoning_draft: str
     final_answer: str
-    validation_status: str
-# -------------------------------
 # RETRIEVAL
-# -------------------------------
-def retrieve_docs_once(query_for_search: str, original_query: str):
     try:
-        scored = vectorstore.similarity_search_with_score(
-            query_for_search,
-            k=cfg.similarity_k,
-        )
     except Exception as e:
         logger.error(f"Retriever error: {e}")
         traceback.print_exc()
@@ -922,1068 +748,377 @@ def retrieve_docs_once(query_for_search: str, original_query: str):
         return [], -1.0
     filtered_docs = []
-    best_score = -1.0
     for doc, raw_score in scored:
         sim = score_to_similarity(raw_score)
-        best_score = max(best_score, sim)
         q = doc.metadata.get("question", "")
         a = doc.metadata.get("answer", "")
         ov = lexical_overlap(original_query, f"{q} {a}")
-        if ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity:
             new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
             new_doc.metadata["sim_score"] = sim
             new_doc.metadata["lexical_overlap"] = ov
             filtered_docs.append(new_doc)
     reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
     return reranked, best_score
-# -------------------------------
-# LANGGRAPH NODES
-# -------------------------------
-def retrieve_node(state: ChatState) -> ChatState:
-    query = state.get("expanded_query") or state["user_query"]
-    retrieval_attempts = int(state.get("retrieval_attempts", 0)) + 1
-    retrieval_mode = "expanded" if state.get("expanded_query") else "original"
-    docs, best_score = retrieve_docs_once(
-        query_for_search=query,
-        original_query=state["user_query"],
-    )
-    if not docs:
-        return {
-            "retrieved_docs": [],
-            "best_score": best_score,
-            "used_context": False,
-            "context": "",
-            "retrieval_attempts": retrieval_attempts,
-            "retrieval_mode": retrieval_mode,
-        }
-    return {
-        "retrieved_docs": docs,
-        "best_score": best_score,
-        "used_context": True,
-        "context": build_context_string(docs, max_chars=cfg.max_context_chars),
-        "retrieval_attempts": retrieval_attempts,
-        "retrieval_mode": retrieval_mode,
-    }
-def should_retry_retrieval(state: ChatState) -> str:
-    used_context = state.get("used_context", False)
-    best_score = state.get("best_score", -1.0)
-    attempts = int(state.get("retrieval_attempts", 0))
-    if used_context and best_score >= cfg.min_faiss_similarity:
-        return "local_reasoning"
-    if not cfg.enable_query_expansion:
-        return "local_reasoning"
-    if attempts >= 2:
-        return "local_reasoning"
-    return "expand_query"
-def expand_query_node(state: ChatState) -> ChatState:
-    expanded = run_query_expansion(state["user_query"])
-    if not expanded.strip():
-        expanded = state["user_query"]
-    return {"expanded_query": expanded}
-def local_reasoning_node(state: ChatState) -> ChatState:
-    context = state.get("context", "").strip()
-    if not context:
-        return {"reasoning_draft": "INSUFFICIENT_EVIDENCE"}
-    reasoning = run_local_reasoner(state["user_query"], context)
-    return {"reasoning_draft": reasoning}
-def generate_node(state: ChatState) -> ChatState:
-    context = state.get("context", "").strip()
-    reasoning = state.get("reasoning_draft", "INSUFFICIENT_EVIDENCE")
-    history = state.get("chat_history", [])
-    if not context:
-        return {"final_answer": "I could not find sufficiently relevant evidence in the RAG database for this question."}
-    answer = run_deepseek_summary(
-        user_query=state["user_query"],
-        context=context,
-        reasoning_draft=reasoning,
-        chat_history=history,
-    )
-    return {"final_answer": answer}
-def validate_node(state: ChatState) -> ChatState:
-    context = state.get("context", "").strip()
-    answer = state.get("final_answer", "").strip()
-    best_score = state.get("best_score", -1.0)
-    docs = state.get("retrieved_docs", [])
-    if not context or not answer:
-        return {"validation_status": "INSUFFICIENT_EVIDENCE: missing context or answer"}
-    if strong_retrieval(best_score, docs):
-        return {"validation_status": "SUPPORTED (validator skipped due to strong retrieval)"}
-    verdict = run_validator(context, answer)
-    if verdict.startswith("SUPPORTED"):
-        return {"validation_status": verdict}
-    if verdict.startswith("PARTLY_UNSUPPORTED"):
-        return {
-            "validation_status": verdict,
-            "final_answer": answer + "\n\nEvidence limits: some parts may not be fully supported by the retrieved evidence."
-        }
-    if verdict.startswith("INSUFFICIENT_EVIDENCE"):
-        return {
-            "validation_status": verdict,
-            "final_answer": answer + "\n\nEvidence limits: the retrieved evidence was weak or only partially relevant."
         }
-    return {"validation_status": verdict}
-def finalize_node(state: ChatState) -> ChatState:
-    answer = strip_bad_sections(state.get("final_answer", ""))
-    if not answer:
-        answer = "I could not generate an answer."
-    return {"final_answer": answer}
-# -------------------------------
-# GRAPH
-# -------------------------------
-builder = StateGraph(ChatState)
-builder.add_node("retrieve", retrieve_node)
-builder.add_node("expand_query", expand_query_node)
-builder.add_node("local_reasoning", local_reasoning_node)
-builder.add_node("generate", generate_node)
-builder.add_node("validate", validate_node)
-builder.add_node("finalize", finalize_node)
-builder.add_edge(START, "retrieve")
-builder.add_conditional_edges(
-    "retrieve",
-    should_retry_retrieval,
-    {
-        "expand_query": "expand_query",
-        "local_reasoning": "local_reasoning",
-    }
-)
-builder.add_edge("expand_query", "retrieve")
-builder.add_edge("local_reasoning", "generate")
-builder.add_edge("generate", "validate")
-builder.add_edge("validate", "finalize")
-builder.add_edge("finalize", END)
-graph = builder.compile()
-logger.info("LangGraph compiled.")
-# -------------------------------
-# FORMATTING HELPERS
-# -------------------------------
-def format_sources_minimal(result: Optional[Dict], chat_mode: str = "ecg_rag") -> str:
-    if chat_mode == "normal_chat":
-        return "## Retrieved Sources\n\nNormal chat mode is active. No ECG evidence retrieval used."
-    if not result:
-        return "## Retrieved Sources\n\nNo sources yet."
-    docs = result.get("retrieved_docs", [])
-    best_score = result.get("best_score", -1.0)
-    if not docs:
-        return (
-            "## Retrieved Sources\n\n"
-            "No sufficiently relevant evidence retrieved.\n\n"
-            f"**Best score:** `{best_score:.3f}`"
-        )
-    lines = [
-        "## Retrieved Sources",
-        f"**Best score:** `{best_score:.3f}`",
-        "",
-    ]
-    for i, d in enumerate(docs, 1):
-        question = d.metadata.get("question", "")
-        answer = d.metadata.get("answer", "")
-        similarity = d.metadata.get("sim_score", "N/A")
-        preview = answer[:210].strip()
-        if len(answer) > 210:
-            preview += "..."
-        lines.extend([
-            f"### Evidence {i}",
-            f"- **Question:** {question}",
-            f"- **Similarity:** `{similarity}`",
-            f"- **Preview:** {preview}",
-            "",
-        ])
-    return "\n".join(lines)
-def format_debug_text(result: Optional[Dict], chat_mode: str = "ecg_rag") -> str:
-    if chat_mode == "normal_chat":
-        return "MODE: normal_chat\nNo retrieval/debug evidence used."
-    if not result:
-        return "No debug result yet."
-    return f"""
-BEST SCORE: {result.get('best_score', -1.0)}
-USED CONTEXT: {result.get('used_context', False)}
-RETRIEVAL ATTEMPTS: {result.get('retrieval_attempts', 0)}
-RETRIEVAL MODE: {result.get('retrieval_mode', 'N/A')}
-VALIDATION STATUS: {result.get('validation_status', 'N/A')}
------ CONTEXT -----
-{result.get('context', '')}
------ LOCAL REASONING DRAFT -----
-{result.get('reasoning_draft', '')}
-""".strip()
-# -------------------------------
 # UI HELPERS
-# -------------------------------
 CUSTOM_CSS = """
-:root {
-    --bg-main: #07111f;
-    --bg-soft: #0b1728;
-    --card: rgba(10, 19, 35, 0.86);
-    --card-2: rgba(14, 25, 43, 0.94);
-    --border: rgba(148, 163, 184, 0.16);
-    --text: #e5eefb;
-    --muted: #94a3b8;
-    --primary: #7c3aed;
-    --primary-2: #2563eb;
-    --success: #10b981;
-}
 html, body, .gradio-container {
     margin: 0 !important;
     padding: 0 !important;
-    min-height: 100%;
-    background:
-        radial-gradient(circle at top left, rgba(124,58,237,0.22), transparent 28%),
-        radial-gradient(circle at top right, rgba(37,99,235,0.18), transparent 24%),
-        linear-gradient(180deg, #050b16 0%, #091321 100%);
-    color: var(--text);
 }
 .gradio-container {
-    max-width: 100% !important;
-    padding: 12px !important;
-}
-footer {
-    visibility: hidden;
 }
-.top-card {
-    border: 1px solid var(--border);
-    background: linear-gradient(135deg, rgba(11,23,40,0.95), rgba(18,31,56,0.92));
-    border-radius: 22px;
     padding: 16px;
     margin-bottom: 12px;
-    box-shadow: 0 14px 40px rgba(0,0,0,0.20);
 }
-.hero-title {
-    font-size: 1.6rem;
     font-weight: 800;
-    color: #f8fbff;
     margin-bottom: 6px;
-    line-height: 1.15;
 }
-.hero-subtitle {
-    color: #cbd5e1;
     font-size: 0.95rem;
-    line-height: 1.5;
-}
-.badges {
-    display: flex;
-    gap: 8px;
-    flex-wrap: wrap;
-    margin-top: 12px;
-}
-.badge {
-    display: inline-flex;
-    align-items: center;
-    gap: 6px;
-    padding: 6px 10px;
-    border-radius: 999px;
-    font-size: 11px;
-    color: #e6eefc;
-    border: 1px solid rgba(255,255,255,0.12);
-    background: rgba(255,255,255,0.06);
-}
-.panel-wrap {
-    border: 1px solid var(--border);
-    background: linear-gradient(180deg, rgba(10,19,35,0.96), rgba(7,14,26,0.94));
-    border-radius: 20px;
-    padding: 12px;
-    box-shadow: 0 16px 45px rgba(0,0,0,0.22);
 }
 #chatbot {
-    height: min(62vh, 640px) !important;
-    min-height: 360px !important;
     border-radius: 18px !important;
-    border: 1px solid var(--border) !important;
-    overflow: hidden !important;
-    box-shadow: 0 14px 40px rgba(0,0,0,0.26) !important;
 }
-.status-card {
-    padding: 12px 14px;
     border-radius: 16px;
-    background: linear-gradient(135deg, #0f172a 0%, #172554 100%);
-    color: #f9fafb;
-    font-size: 14px;
-    border: 1px solid rgba(255,255,255,0.12);
-    box-shadow: 0 10px 30px rgba(0,0,0,0.2);
-}
-.muted {
-    color: #a5b4fc;
-    font-size: 12px;
 }
-.blink-dots {
-    font-size: 22px;
-    font-weight: 800;
     letter-spacing: 4px;
     animation: blinkDots 1s steps(1, end) infinite;
-    display: inline-block;
-    padding: 2px 0;
 }
 @keyframes blinkDots {
     0% { opacity: 1; }
-    50% { opacity: 0.15; }
     100% { opacity: 1; }
 }
 textarea, .gr-textbox textarea {
-    border-radius: 16px !important;
-    font-size: 15px !important;
-}
-.gr-textbox label, .gr-markdown, .gr-button {
-    font-size: 14px !important;
 }
 button {
     border-radius: 14px !important;
     min-height: 44px !important;
     font-weight: 600 !important;
 }
-.mobile-stack {
-    display: flex;
-    flex-direction: column;
-    gap: 12px;
-}
-.mobile-scroll {
-    max-height: 34vh;
-    overflow-y: auto;
-}
-.command-note {
-    color: #cbd5e1;
-    font-size: 0.88rem;
-    line-height: 1.45;
-}
-.mode-note {
-    color: #cbd5e1;
-    font-size: 0.88rem;
-    margin-top: 6px;
-}
-@media (max-width: 1024px) {
-    .gradio-container { padding: 10px !important; }
-    .hero-title { font-size: 1.45rem; }
-    .hero-subtitle { font-size: 0.92rem; }
-    #chatbot { height: 56vh !important; }
-}
-@media (max-width: 768px) {
-    .gradio-container { padding: 8px !important; }
-    .top-card { padding: 14px; border-radius: 18px; }
-    .hero-title { font-size: 1.28rem; }
-    .hero-subtitle { font-size: 0.88rem; line-height: 1.45; }
-    .badge { font-size: 10px; padding: 5px 8px; }
-    .panel-wrap { padding: 10px; border-radius: 16px; }
-    #chatbot {
-        height: 52vh !important;
-        min-height: 320px !important;
-        border-radius: 16px !important;
-    }
-    button { width: 100% !important; }
-    .mobile-scroll { max-height: 240px; }
-}
-@media (max-width: 480px) {
-    .hero-title { font-size: 1.15rem; }
-    .hero-subtitle { font-size: 0.83rem; }
-    #chatbot {
-        height: 50vh !important;
-        min-height: 300px !important;
-    }
-    textarea, .gr-textbox textarea { font-size: 14px !important; }
-}
 """
-def hero_html() -> str:
     return """
-    <div class="top-card">
-        <div class="hero-title">🫀 Mr Cardio</div>
-        <div class="hero-subtitle">
-            ECG and cardiology specialist chatbot with automatic mode switching,
-            evidence retrieval, local ECG reasoning, grounded summaries, and normal chat mode.
-        </div>
-        <div class="badges">
-            <div class="badge">ECG RAG</div>
-            <div class="badge">Normal Chat</div>
-            <div class="badge">FAISS Retrieval</div>
-            <div class="badge">LoRA Adapter</div>
-            <div class="badge">Validated Output</div>
         </div>
     </div>
     """
 def thinking_html(stage: str) -> str:
-    icon = "⏳"
-    subtitle = "Retrieval → reasoning → grounded answer"
-    if "switch" in stage.lower() or "activating" in stage.lower() or "updating ui" in stage.lower():
-        icon = "⚡"
-        subtitle = "Updating mode and interface"
     return f"""
-    <div class="status-card">
-        <div style="display:flex;align-items:center;gap:12px;">
-            <div style="font-size:19px;">{icon}</div>
-            <div>
-                <div style="font-weight:700;">{stage}</div>
-                <div class="muted">{subtitle}</div>
-                <div class="blink-dots">...</div>
-            </div>
-        </div>
     </div>
     """
-def initialize_session():
-    return {
-        "chat_history": [],
-        "last_result": None,
-        "chat_mode": "ecg_rag",
-    }
-def add_assistant_placeholder(history, text="..."):
     history = history or []
-    history.append({
-        "role": "assistant",
-        "content": text,
-        "metadata": {"title": "Thinking"}
-    })
     return history
-def update_last_assistant_message(history, text, title=None):
     history = history or []
     if not history or history[-1]["role"] != "assistant":
-        msg = {"role": "assistant", "content": text}
-        if title:
-            msg["metadata"] = {"title": title}
-        history.append(msg)
         return history
-    history[-1] = {"role": "assistant", "content": text}
-    if title:
-        history[-1]["metadata"] = {"title": title}
     return history
-def user_submit(user_message, chat_ui_history):
-    chat_ui_history = chat_ui_history or []
     user_message = (user_message or "").strip()
     if not user_message:
-        return "", chat_ui_history
-    chat_ui_history.append({"role": "user", "content": user_message})
-    return "", chat_ui_history
-def set_chat_mode(mode_value: str, session_state: Dict):
-    if session_state is None:
-        session_state = initialize_session()
-    session_state["chat_mode"] = mode_value
-    return session_state
-def get_mode_label(session_state: Dict) -> str:
-    mode = (session_state or {}).get("chat_mode", "ecg_rag")
-    if mode == "normal_chat":
-        return """
-        <div class="mode-note">
-            <b>Mode:</b> Normal Chat
-        </div>
-        """
-    return """
-    <div class="mode-note">
-        <b>Mode:</b> ECG &amp; Cardiology
-        <br>
-        <span style="color:#93c5fd;">Medical / ECG / Cardiology specialist mode active</span>
-    </div>
-    """
-# -------------------------------
-# CORE CHAT
-# -------------------------------
-def run_chat_turn(user_message: str, memory_state: Dict) -> Dict:
-    if memory_state is None:
-        memory_state = initialize_session()
-    chat_mode = memory_state.get("chat_mode", "ecg_rag")
-    if chat_mode == "normal_chat":
-        answer = run_normal_chat(
-            user_query=user_message,
-            chat_history=memory_state["chat_history"]
-        )
-        result = {
-            "final_answer": answer,
-            "best_score": -1.0,
-            "used_context": False,
-            "validation_status": "NORMAL_CHAT_MODE",
-            "retrieved_docs": [],
-            "context": "",
-            "reasoning_draft": "",
-            "retrieval_attempts": 0,
-            "retrieval_mode": "none",
-        }
-    else:
-        state_in = {
-            "user_query": user_message,
-            "chat_history": memory_state["chat_history"],
-            "retrieval_attempts": 0,
-        }
-        try:
-            result = graph.invoke(state_in)
-        except Exception as e:
-            logger.error(f"Graph invocation error: {e}")
-            traceback.print_exc()
-            result = {
-                "final_answer": f"I hit a runtime error while processing the request: {e}",
-                "best_score": -1.0,
-                "used_context": False,
-                "validation_status": "ERROR",
-                "retrieved_docs": [],
-                "context": "",
-                "reasoning_draft": "",
-                "retrieval_attempts": 0,
-                "retrieval_mode": "error",
-            }
-    answer = result.get("final_answer", "").strip() or "I could not generate an answer."
-    best_score = result.get("best_score", -1.0)
-    validation_status = result.get("validation_status", "N/A")
-    confidence = compute_confidence(result) if chat_mode == "ecg_rag" else 1.0
-    answer_with_footer = (
-        f"{answer}\n\n---\n"
-        f"📊 mode={chat_mode} | confidence={confidence:.2f} | best_score={best_score:.3f} | validation={validation_status}"
-    )
-    memory_state["chat_history"].append({"role": "user", "content": user_message})
-    memory_state["chat_history"].append({"role": "assistant", "content": answer})
-    memory_state["chat_history"] = memory_state["chat_history"][-12:]
-    memory_state["last_result"] = result
-    return {
-        "answer": answer_with_footer,
-        "memory_state": memory_state,
-        "sources_markdown": format_sources_minimal(result, chat_mode=chat_mode),
-        "debug_text": format_debug_text(result, chat_mode=chat_mode),
-    }
-def bot_respond_stream(chat_ui_history, session_state):
-    global vectorstore
     if session_state is None:
         session_state = initialize_session()
-    if not chat_ui_history:
-        yield (
-            chat_ui_history,
-            session_state,
-            "## Retrieved Sources\n\nNo sources yet.",
-            "No debug result yet.",
-            "",
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        return
-    user_message = str(chat_ui_history[-1]["content"]).strip()
-    chat_mode = session_state.get("chat_mode", "ecg_rag")
-    # ---------------------------------
-    # AUTO MODE SWITCH
-    # ---------------------------------
-    requested_mode = detect_mode_switch_request(user_message)
-    if requested_mode and requested_mode != chat_mode:
-        session_state["chat_mode"] = requested_mode
-        chat_ui_history = add_assistant_placeholder(chat_ui_history, text="...")
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(session_state.get("last_result"), chat_mode=requested_mode),
-            format_debug_text(session_state.get("last_result"), chat_mode=requested_mode),
-            thinking_html(
-                f"Switching to {'ECG & Cardiology Mode' if requested_mode == 'ecg_rag' else 'Normal Chat Mode'}"
-            ),
-            get_mode_label(session_state),
-            requested_mode,
-        )
-        time.sleep(cfg.blink_stage_1)
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(session_state.get("last_result"), chat_mode=requested_mode),
-            format_debug_text(session_state.get("last_result"), chat_mode=requested_mode),
-            thinking_html("Updating UI"),
-            get_mode_label(session_state),
-            requested_mode,
-        )
-        time.sleep(cfg.blink_stage_2)
-        final_switch_text = mode_switch_message(requested_mode)
-        if cfg.enable_typewriter_stream:
-            for partial in stream_text(final_switch_text, step=90):
-                chat_ui_history = update_last_assistant_message(
-                    chat_ui_history,
-                    partial,
-                    title="Mode Update"
-                )
-                yield (
-                    chat_ui_history,
-                    session_state,
-                    format_sources_minimal(session_state.get("last_result"), chat_mode=requested_mode),
-                    format_debug_text(session_state.get("last_result"), chat_mode=requested_mode),
-                    "",
-                    get_mode_label(session_state),
-                    requested_mode,
-                )
-        chat_ui_history = update_last_assistant_message(
-            chat_ui_history,
-            final_switch_text,
-            title="Mode Update"
-        )
-        session_state["chat_history"].append({"role": "user", "content": user_message})
-        session_state["chat_history"].append({"role": "assistant", "content": final_switch_text})
-        session_state["chat_history"] = session_state["chat_history"][-12:]
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(session_state.get("last_result"), chat_mode=requested_mode),
-            format_debug_text(session_state.get("last_result"), chat_mode=requested_mode),
-            "",
-            get_mode_label(session_state),
-            requested_mode,
-        )
         return
-    if user_message == "/sources":
-        result = session_state.get("last_result")
-        chat_ui_history.append({
-            "role": "assistant",
-            "content": format_sources_minimal(result, chat_mode=chat_mode),
-            "metadata": {"title": "Sources"}
-        })
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(result, chat_mode=chat_mode),
-            format_debug_text(result, chat_mode=chat_mode),
-            "",
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        return
-    if user_message == "/debug":
-        result = session_state.get("last_result")
-        chat_ui_history.append({
-            "role": "assistant",
-            "content": format_debug_text(result, chat_mode=chat_mode),
-            "metadata": {"title": "Debug"}
-        })
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(result, chat_mode=chat_mode),
-            format_debug_text(result, chat_mode=chat_mode),
-            "",
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        return
-    if user_message == "/rebuild":
-        if not cfg.allow_rebuild_vectorstore:
-            chat_ui_history.append({
-                "role": "assistant",
-                "content": "Vector store rebuild is disabled on this Space.",
-                "metadata": {"title": "Restricted"}
-            })
-            yield (
-                chat_ui_history,
-                session_state,
-                format_sources_minimal(session_state.get("last_result"), chat_mode=chat_mode),
-                format_debug_text(session_state.get("last_result"), chat_mode=chat_mode),
-                "",
-                get_mode_label(session_state),
-                session_state.get("chat_mode", "ecg_rag"),
-            )
-            return
-        chat_ui_history = add_assistant_placeholder(chat_ui_history)
-        yield (
-            chat_ui_history,
-            session_state,
-            "",
-            "",
-            thinking_html("Rebuilding vector store"),
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        time.sleep(cfg.blink_stage_1)
-        chat_ui_history = update_last_assistant_message(
-            chat_ui_history,
-            "Rebuilding vector store and reloading embeddings...",
-            title="Maintenance"
-        )
-        yield (
-            chat_ui_history,
-            session_state,
-            "",
-            "",
-            thinking_html("Rebuilding vector store"),
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        build_vectorstore()
-        vectorstore = load_vectorstore()
-        chat_ui_history = update_last_assistant_message(
-            chat_ui_history,
-            "✅ Vector store rebuilt and reloaded.",
-            title="Done"
-        )
-        yield (
-            chat_ui_history,
-            session_state,
-            format_sources_minimal(session_state.get("last_result"), chat_mode=chat_mode),
-            format_debug_text(session_state.get("last_result"), chat_mode=chat_mode),
-            "",
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        return
-    chat_ui_history = add_assistant_placeholder(chat_ui_history, text="...")
-    yield (
-        chat_ui_history,
-        session_state,
-        "",
-        "",
-        thinking_html("Starting"),
-        get_mode_label(session_state),
-        session_state.get("chat_mode", "ecg_rag"),
-    )
-    time.sleep(cfg.blink_stage_1)
-    if chat_mode == "normal_chat":
-        yield (
-            chat_ui_history,
-            session_state,
-            "",
-            "",
-            thinking_html("Generating normal chat reply"),
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        time.sleep(cfg.blink_stage_2)
     else:
-        yield (
-            chat_ui_history,
-            session_state,
-            "",
-            "",
-            thinking_html("Retrieving evidence"),
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        time.sleep(cfg.blink_stage_2)
-        yield (
-            chat_ui_history,
-            session_state,
-            "",
-            "",
-            thinking_html("Running ECG adapter reasoning"),
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-        time.sleep(cfg.blink_stage_3)
-    out = run_chat_turn(user_message, session_state)
-    yield (
-        chat_ui_history,
-        session_state,
-        out["sources_markdown"],
-        out["debug_text"],
-        thinking_html("Generating grounded summary" if chat_mode == "ecg_rag" else "Finishing reply"),
-        get_mode_label(session_state),
-        session_state.get("chat_mode", "ecg_rag"),
-    )
-    time.sleep(cfg.blink_before_answer)
     if cfg.enable_typewriter_stream:
-        for partial in stream_text(out["answer"], step=120):
-            chat_ui_history = update_last_assistant_message(
-                chat_ui_history,
-                partial,
-                title="Answer"
-            )
-            yield (
-                chat_ui_history,
-                session_state,
-                out["sources_markdown"],
-                out["debug_text"],
-                "",
-                get_mode_label(session_state),
-                session_state.get("chat_mode", "ecg_rag"),
-            )
-    chat_ui_history = update_last_assistant_message(
-        chat_ui_history,
-        out["answer"],
-        title="Answer"
-    )
-    yield (
-        chat_ui_history,
-        out["memory_state"],
-        out["sources_markdown"],
-        out["debug_text"],
-        "",
-        get_mode_label(out["memory_state"]),
-        out["memory_state"].get("chat_mode", "ecg_rag"),
-    )
-def clear_chat():
-    st = initialize_session()
-    return (
-        [],
-        st,
-        "## Retrieved Sources\n\nNo sources yet.",
-        "No debug result yet.",
-        "",
-        get_mode_label(st),
-        st.get("chat_mode", "ecg_rag"),
-    )
-def rebuild_from_button(session_state, chatbot_history):
-    global vectorstore
-    if session_state is None:
-        session_state = initialize_session()
-    chat_mode = session_state.get("chat_mode", "ecg_rag")
-    if not cfg.allow_rebuild_vectorstore:
-        chatbot_history = chatbot_history or []
-        chatbot_history.append({
-            "role": "assistant",
-            "content": "Vector store rebuild is disabled on this Space.",
-            "metadata": {"title": "Restricted"}
-        })
-        return (
-            chatbot_history,
-            session_state,
-            format_sources_minimal(session_state.get("last_result"), chat_mode=chat_mode),
-            format_debug_text(session_state.get("last_result"), chat_mode=chat_mode),
-            "",
-            get_mode_label(session_state),
-            session_state.get("chat_mode", "ecg_rag"),
-        )
-    build_vectorstore()
-    vectorstore = load_vectorstore()
-    chatbot_history = chatbot_history or []
-    chatbot_history.append({
-        "role": "assistant",
-        "content": "✅ Vector store rebuilt and reloaded.",
-        "metadata": {"title": "Done"}
-    })
-    return (
-        chatbot_history,
-        session_state,
-        format_sources_minimal(session_state.get("last_result"), chat_mode=chat_mode),
-        format_debug_text(session_state.get("last_result"), chat_mode=chat_mode),
-        "",
-        get_mode_label(session_state),
-        session_state.get("chat_mode", "ecg_rag"),
-    )
-# -------------------------------
 # APP
-# -------------------------------
-with gr.Blocks(
-    title="Medical CSV RAG Chatbot",
-    css=CUSTOM_CSS,
-    theme=gr.themes.Soft(
-        primary_hue="indigo",
-        secondary_hue="blue",
-        neutral_hue="slate",
-        radius_size="lg",
-        text_size="md",
-    ),
-) as demo:
-    gr.HTML(hero_html())
     session_state = gr.State(initialize_session())
-    with gr.Column(elem_classes=["mobile-stack"]):
-        with gr.Group(elem_classes=["panel-wrap"]):
-            mode_selector = gr.Radio(
-                choices=[
-                    ("ECG RAG Mode", "ecg_rag"),
-                    ("Normal Chat Mode", "normal_chat"),
-                ],
-                value="ecg_rag",
-                label="Chat Mode",
-                interactive=True,
-            )
-            mode_status = gr.HTML(get_mode_label(initialize_session()))
-            chatbot = gr.Chatbot(
-                label="Clinical Chat",
-                height=640,
-                elem_id="chatbot",
-                type="messages",
-                show_copy_button=True,
-                bubble_full_width=False,
-                avatar_images=(None, None),
-            )
-            user_box = gr.Textbox(
-                label="Ask a question",
-                placeholder="e.g. What are the ECG findings in hyperkalemia? or type 'switch to ECG mode'",
-                lines=2,
-                autofocus=True,
-            )
-            status_html = gr.HTML("")
-            with gr.Row():
-                send_btn = gr.Button("Send", variant="primary")
-                clear_btn = gr.Button("Clear")
-                rebuild_btn = gr.Button("Rebuild Store")
-            gr.HTML(
-                """
-                <div class="command-note">
-                    Commands: <code>/sources</code>, <code>/debug</code>, <code>/rebuild</code>
-                </div>
-                """
-            )
-        with gr.Accordion("Retrieved Sources", open=False):
-            with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
-                sources_panel = gr.Markdown("## Retrieved Sources\n\nNo sources yet.")
-        if cfg.show_debug_panel:
-            with gr.Accordion("Debug Panel", open=False):
-                with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
-                    debug_panel = gr.Textbox(
-                        label="Debug",
-                        value="No debug result yet.",
-                        lines=18,
-                        max_lines=28,
-                        interactive=False,
-                    )
-        else:
-            debug_panel = gr.Textbox(visible=False, value="")
-    mode_selector.change(
-        fn=set_chat_mode,
-        inputs=[mode_selector, session_state],
-        outputs=[session_state],
-        queue=False,
-    ).then(
-        fn=get_mode_label,
-        inputs=[session_state],
-        outputs=[mode_status],
-        queue=False,
-    )
     submit_event = user_box.submit(
         fn=user_submit,
@@ -1991,41 +1126,33 @@ with gr.Blocks(
         outputs=[user_box, chatbot],
         queue=True,
     )
     submit_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
-        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html, mode_status, mode_selector],
         queue=True,
     )
-    send_click = send_btn.click(
         fn=user_submit,
         inputs=[user_box, chatbot],
         outputs=[user_box, chatbot],
         queue=True,
     )
-    send_click.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
-        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html, mode_status, mode_selector],
         queue=True,
     )
     clear_btn.click(
         fn=clear_chat,
         inputs=[],
-        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html, mode_status, mode_selector],
         queue=False,
     )
-    rebuild_btn.click(
-        fn=rebuild_from_button,
-        inputs=[session_state, chatbot],
-        outputs=[chatbot, session_state, sources_panel, debug_panel, status_html, mode_status, mode_selector],
-        queue=True,
-    )
 demo.queue(default_concurrency_limit=1)
@@ -2034,4 +1161,4 @@ if __name__ == "__main__":
         debug=cfg.launch_debug,
         server_name=cfg.server_name,
         server_port=cfg.server_port,
-    )

 import os
 import re
 import time
+import json
+import queue
 import logging
+import threading
+import traceback
+from typing import List, Dict, TypedDict, Optional, Tuple
 from dataclasses import dataclass, field
 import torch
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
 # ============================================================
+# AGENTIC ECG CHATBOT
+# - Starts as normal chatbot
+# - Detects ECG / cardiology intent automatically
+# - Retrieves from CSV RAG store only for ECG questions
+# - Runs local ECG adapter reasoning
+# - Runs remote evidence summarizer
+# - Runs remote clinical-composer agent
+# - Merges both into a final long answer
+# - Simple UI with Send / Clear
+# - Visible thinking status + progress logs
 # ============================================================
+raw_omp = str(os.getenv("OMP_NUM_THREADS", "1")).strip()
+os.environ["OMP_NUM_THREADS"] = raw_omp if re.fullmatch(r"\d+", raw_omp) else "1"
+# ============================================================
 # LOGGING
+# ============================================================
 logging.basicConfig(
     level=logging.INFO,
+    format="%(asctime)s | %(levelname)s | %(message)s"
 )
+logger = logging.getLogger("agentic_ecg_chatbot")
+# ============================================================
 # CONFIG
+# ============================================================
 @dataclass
 class Config:
+    base_model_path: str = os.getenv("BASE_MODEL_PATH", "meta-llama/Llama-3.1-8B-Instruct")
+    adapter_dir: str = os.getenv("ADAPTER_DIR", "adapter_refined_v10")
+    data_csv: str = os.getenv("DATA_CSV", "RAGmaterials/ECG_RAG_only_clean.csv")
+    rag_dir: str = os.getenv("RAG_DIR", "RAGmaterials")
     vectorstore_dir: str = field(init=False)
     hf_token: str = os.getenv("HF_TOKEN", "")
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
+    embed_model_name: str = os.getenv("EMBED_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
+    similarity_k: int = int(os.getenv("SIMILARITY_K", "10"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
+    max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5500"))
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
+    max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "220"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
+    min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.06"))
+    min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.18"))
+    deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.15"))
+    deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "900"))
+    use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
     enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
+    enable_warmup: bool = os.getenv("ENABLE_WARMUP", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
     def __post_init__(self):
         self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
         os.makedirs(self.rag_dir, exist_ok=True)
         if not self.deepseek_api_key:
+            raise ValueError("Missing DEEPSEEK_API_KEY in environment / Space secrets.")
         if not self.hf_token:
+            raise ValueError("Missing HF_TOKEN in environment / Space secrets.")
         for path, name in [
             (self.adapter_dir, "Adapter directory"),
 logger.info("Configuration loaded.")
+# ============================================================
 # PROMPTS
+# ============================================================
+INTENT_CLASSIFIER_SYSTEM = """
+You classify user messages.
+Return only one label:
+- ECG_RAG
+- NORMAL_CHAT
+Choose ECG_RAG if the message is about ECG, EKG, cardiology, arrhythmia, heart rhythm, cardiac conduction,
+ST changes, QRS, PR, QT, tachycardia, bradycardia, atrial fibrillation, flutter, bundle branch block,
+heart block, hyperkalemia ECG changes, or similar cardiology interpretation.
+Otherwise return NORMAL_CHAT.
+""".strip()
+QUERY_EXPANSION_SYSTEM = """
+You expand ECG and cardiology retrieval queries.
 Rules:
+1. Preserve the exact user intent.
+2. Add close cardiology / ECG synonyms and alternate wording.
+3. Do not answer the question.
+4. Output only the expanded retrieval query.
+""".strip()
+LOCAL_REASONING_SYSTEM = """
+You are a strict ECG and cardiology reasoning assistant.
+You are not the final answer generator.
+Use only the evidence provided.
+Do not invent facts.
+Output exactly in this format:
 KEY_FINDINGS:
 - ...
 LIMITS:
 - ...
+If evidence is insufficient, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
+RAG_SUMMARY_SYSTEM = """
+You are a clinical evidence summarizer.
+Write a well-structured answer grounded only in the provided evidence and reasoning draft.
+Do not use outside knowledge.
+Be accurate, conservative, and clinically clear.
+Output format:
+### Summary
+4 to 7 full sentences.
+### Key Evidence Points
+4 to 6 bullet points.
+### Clinical Interpretation
+2 to 4 bullet points if supported.
+### Evidence Limits
+State what is not established.
+If the evidence is too weak, output exactly:
 INSUFFICIENT_EVIDENCE
+""".strip()
+CLINICAL_COMPOSER_SYSTEM = """
+You are a second medical composition agent.
+Your job is to produce a longer, polished explanation from the same evidence and the same user question.
+You must stay faithful to the evidence.
+Do not add unsupported facts.
+Do not mention tools, prompts, or pipelines.
+Output format:
+### Direct Answer
+A direct answer in 2 to 3 sentences.
+### Expanded Explanation
+A longer explanation in 5 to 8 sentences.
+### Important Notes
+3 to 5 bullet points.
+### Remaining Uncertainty
+State what the evidence does not prove.
+If the evidence is too weak, output exactly:
 INSUFFICIENT_EVIDENCE
 """.strip()
+FINAL_MERGER_SYSTEM = """
+You are the final answer agent.
+You will receive:
+1. the user's question
+2. retrieved evidence
+3. a local ECG adapter reasoning draft
+4. summary agent output
+5. clinical composer output
+Write one final long-form answer.
 Rules:
+- Use only supported information.
+- Merge overlapping ideas cleanly.
+- Do not repeat the same point too many times.
+- Make the answer helpful, detailed, and readable.
+- Do not mention internal agents or processing steps.
+Output format:
+### Final Answer
+A detailed answer in 6 to 10 sentences.
+### Key Points
+4 to 6 bullets.
+### Clinical Perspective
+2 to 4 bullets if supported.
+### Limits
+A short honest limitations section.
+If evidence is weak, output exactly:
+INSUFFICIENT_EVIDENCE
 """.strip()
+NORMAL_CHAT_SYSTEM = """
+You are a helpful, friendly chatbot.
+Be conversational, clear, and useful.
+Answer normally.
+Do not mention hidden tools or internal systems.
+""".strip()
+# ============================================================
 # HELPERS
+# ============================================================
 def clean_text(x: str) -> str:
     x = str(x).replace("\x00", " ").strip()
     x = re.sub(r"\s+", " ", x)
 def strip_bad_sections(txt: str) -> str:
     t = str(txt).strip()
     t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
     return t
 def infer_tags(question: str, answer: str) -> List[str]:
     text = f"{question} {answer}".lower()
     tags: List[str] = []
     keyword_map = {
+        "ecg": ["ecg", "ekg", "qrs", "pr", "qt", "st elevation", "t wave", "arrhythmia", "tachycardia", "bradycardia"],
         "diagnosis": ["diagnosis", "diagnose", "criteria"],
+        "treatment": ["treat", "therapy", "management", "drug"],
+        "symptoms": ["symptom", "sign", "presentation"],
+        "etiology": ["cause", "caused by", "associated with", "risk factor"],
     }
     for tag, words in keyword_map.items():
         if any(w in text for w in words):
             tags.append(tag)
     return tags
     return len(q_words & t_words) / max(1, len(q_words))
 def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
+    max_turns = max_turns or cfg.max_chat_history_turns
     items = chat_history[-max_turns:]
     if not items:
         return "[EMPTY]"
     return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
 def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
+    max_chars = max_chars or cfg.max_context_chars
     blocks = []
     total = 0
     for i, d in enumerate(docs, 1):
         q = d.metadata.get("question", "")
         a = d.metadata.get("answer", "")
         tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
+        sim = d.metadata.get("sim_score", "N/A")
         block = f"""
 ==============================
 EVIDENCE_ID: {i}
 SOURCE_ID: {d.metadata.get('id')}
 SOURCE_QUESTION: {q}
 SOURCE_TAGS: {tags}
+SIMILARITY: {sim}
 EVIDENCE_TEXT:
 {a}
 ==============================
 """.strip()
         if total + len(block) > max_chars:
             break
         blocks.append(block)
         total += len(block) + 2
     return "\n\n".join(blocks).strip()
+def stream_text(text: str, step: int = 120):
     acc = ""
     for i in range(0, len(text), step):
         acc += text[i:i + step]
         yield acc
+# ============================================================
+# PROGRESS / LOGGING
+# ============================================================
+def new_progress_state() -> Dict:
+    return {"lines": []}
+def add_progress(progress_state: Dict, msg: str):
+    line = f"[{time.strftime('%H:%M:%S')}] {msg}"
+    logger.info(msg)
+    progress_state["lines"].append(line)
+    progress_state["lines"] = progress_state["lines"][-80:]
+def progress_text(progress_state: Dict) -> str:
+    lines = progress_state.get("lines", [])
+    return "\n".join(lines) if lines else "No progress yet."
+# ============================================================
+# ECG QUERY DETECTION
+# ============================================================
+ECG_REGEXES = [
+    r"\becg\b", r"\bekg\b", r"\bcardiology\b", r"\barrhythmia\b", r"\bheart rhythm\b",
+    r"\batrial fibrillation\b", r"\bafib\b", r"\bflutter\b", r"\bqrs\b", r"\bpr interval\b",
+    r"\bqt\b", r"\bst elevation\b", r"\bst depression\b", r"\bt wave\b", r"\bbradycardia\b",
+    r"\btachycardia\b", r"\bheart block\b", r"\bbundle branch block\b", r"\bhyperkalemia\b",
+]
+def detect_ecg_by_rules(text: str) -> bool:
+    text = str(text or "").lower().strip()
+    return any(re.search(p, text) for p in ECG_REGEXES)
+# ============================================================
 # EMBEDDINGS + VECTORSTORE
+# ============================================================
 logger.info("Loading embeddings...")
 embeddings = HuggingFaceEmbeddings(
     model_name=cfg.embed_model_name,
     model_kwargs={
         "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "token": cfg.hf_token if cfg.hf_token else None,
     },
     encode_kwargs={"normalize_embeddings": True},
 )
                     "question": q,
                     "answer": a,
                     "tags": infer_tags(q, a),
+                },
             )
         )
 logger.info("Vectorstore ready.")
+# ============================================================
+# MODEL LOADING
+# ============================================================
 logger.info("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(
     cfg.base_model_path,
     use_fast=True,
+    token=cfg.hf_token if cfg.hf_token else None,
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 base_model.eval()
+logger.info("Loading ECG adapter...")
 reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
 reason_model.eval()
+logger.info("Loading remote LLM client...")
+remote_llm = ChatOpenAI(
+    model=cfg.deepseek_model,
+    api_key=cfg.deepseek_api_key,
+    base_url=cfg.deepseek_base_url,
+    temperature=cfg.deepseek_temperature,
+    max_tokens=cfg.deepseek_max_tokens,
+)
 def get_primary_model_device(model) -> torch.device:
     try:
         return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ============================================================
+# LLM CALLS
+# ============================================================
+def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
+    try:
+        resp = remote_llm.invoke([
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ])
+        text = resp.content if hasattr(resp, "content") else str(resp)
+        text = strip_bad_sections(text)
+        return text if text.strip() else fallback
+    except Exception as e:
+        logger.error(f"Remote LLM error: {e}")
+        traceback.print_exc()
+        return fallback
+def classify_intent(user_query: str) -> str:
+    if detect_ecg_by_rules(user_query):
+        return "ECG_RAG"
+    result = llm_text(
+        INTENT_CLASSIFIER_SYSTEM,
+        f"USER_MESSAGE:\n{user_query}",
+        fallback="NORMAL_CHAT",
+    ).strip().upper()
+    return "ECG_RAG" if "ECG_RAG" in result else "NORMAL_CHAT"
+def run_query_expansion(user_query: str) -> str:
+    if not cfg.enable_query_expansion:
+        return user_query
+    prompt = f"USER_QUERY:\n{user_query}\n\nExpand this for ECG/cardiology retrieval."
+    expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
+    return expanded.strip() if expanded else user_query
 @torch.inference_mode()
 def run_local_reasoner(user_query: str, context: str) -> str:
     try:
         messages = [
             {"role": "system", "content": LOCAL_REASONING_SYSTEM},
+            {"role": "user", "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context or '[EMPTY]'}"},
         ]
         prompt = tokenizer.apply_chat_template(
         gen_ids = out[0, inputs["input_ids"].shape[1]:]
         text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
+        return strip_bad_sections(text) or "INSUFFICIENT_EVIDENCE"
     except Exception as e:
         logger.error(f"Local reasoner error: {e}")
         traceback.print_exc()
         return "INSUFFICIENT_EVIDENCE"
+def run_rag_summary(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
     prompt = f"""
+CHAT_HISTORY:
+{history_to_text(chat_history)}
+USER_QUESTION:
+{user_query}
+RETRIEVED_EVIDENCE:
+{context if context.strip() else '[EMPTY]'}
+LOCAL_REASONING_DRAFT:
+{reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
+""".strip()
+    return llm_text(RAG_SUMMARY_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
+def run_clinical_composer(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
     prompt = f"""
 CHAT_HISTORY:
 {history_to_text(chat_history)}
 LOCAL_REASONING_DRAFT:
 {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
 """.strip()
+    return llm_text(CLINICAL_COMPOSER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
+def run_final_merger(user_query: str, context: str, reasoning_draft: str, summary_a: str, summary_b: str) -> str:
     prompt = f"""
+USER_QUESTION:
+{user_query}
+RETRIEVED_EVIDENCE:
 {context if context.strip() else '[EMPTY]'}
+LOCAL_ECG_REASONING:
+{reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
+SUMMARY_AGENT_OUTPUT:
+{summary_a if summary_a.strip() else '[EMPTY]'}
+CLINICAL_COMPOSER_OUTPUT:
+{summary_b if summary_b.strip() else '[EMPTY]'}
+""".strip()
+    return llm_text(FINAL_MERGER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
 def run_normal_chat(user_query: str, chat_history: List[Dict[str, str]]) -> str:
 USER_MESSAGE:
 {user_query}
 """.strip()
+    return llm_text(NORMAL_CHAT_SYSTEM, prompt, fallback="Sorry, I could not generate a response.")
+# ============================================================
 # WARMUP
+# ============================================================
 def warmup_models():
     logger.info("Warming up local reasoner...")
     try:
 EVIDENCE_ID: 1
 SOURCE_QUESTION: What are ECG findings in hyperkalemia?
 SOURCE_TAGS: ecg
+SIMILARITY: 0.9
 EVIDENCE_TEXT:
 Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
 ==============================
         logger.warning(f"Warmup failed: {e}")
+if cfg.enable_warmup:
+    warmup_models()
+# ============================================================
 # STATE
+# ============================================================
+class AgentState(TypedDict, total=False):
     user_query: str
     chat_history: List[Dict[str, str]]
+    detected_mode: str
+    expanded_query: str
     retrieved_docs: List[Document]
     best_score: float
     context: str
+    local_reasoning: str
+    summary_agent: str
+    composer_agent: str
     final_answer: str
+# ============================================================
 # RETRIEVAL
+# ============================================================
+def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
+    top_n = top_n or cfg.top_k_final
+    q_words = set(re.findall(r"\w+", query.lower()))
+    scored = []
+    for d in docs:
+        question = d.metadata.get("question", "")
+        answer = d.metadata.get("answer", "")
+        tags = " ".join(d.metadata.get("tags", []))
+        text = f"{question} {answer} {tags}".lower()
+        t_words = set(re.findall(r"\w+", text))
+        overlap = len(q_words & t_words) / max(1, len(q_words))
+        question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
+        tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
+        sim_score = float(d.metadata.get("sim_score", 0.0))
+        final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
+        scored.append((d, final_score))
+    scored.sort(key=lambda x: x[1], reverse=True)
+    return [d for d, _ in scored[:top_n]]
+def retrieve_docs_once(query_for_search: str, original_query: str) -> Tuple[List[Document], float]:
     try:
+        scored = vectorstore.similarity_search_with_score(query_for_search, k=cfg.similarity_k)
     except Exception as e:
         logger.error(f"Retriever error: {e}")
         traceback.print_exc()
         return [], -1.0
     filtered_docs = []
     for doc, raw_score in scored:
         sim = score_to_similarity(raw_score)
         q = doc.metadata.get("question", "")
         a = doc.metadata.get("answer", "")
         ov = lexical_overlap(original_query, f"{q} {a}")
+        if sim >= 0.45 or (ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity):
             new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
             new_doc.metadata["sim_score"] = sim
             new_doc.metadata["lexical_overlap"] = ov
             filtered_docs.append(new_doc)
     reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
+    best_score = max((float(d.metadata.get("sim_score", -1.0)) for d in reranked), default=-1.0)
     return reranked, best_score
+def retrieve_docs(query: str) -> Tuple[List[Document], float, str]:
+    docs_a, score_a = retrieve_docs_once(query, query)
+    if not cfg.enable_query_expansion:
+        return docs_a, score_a, query
+    expanded = run_query_expansion(query)
+    docs_b, score_b = retrieve_docs_once(expanded, query)
+    merged = []
+    seen_ids = set()
+    for d in docs_a + docs_b:
+        doc_id = d.metadata.get("id")
+        if doc_id not in seen_ids:
+            seen_ids.add(doc_id)
+            merged.append(d)
+    merged = rerank_docs(query, merged, top_n=cfg.top_k_final)
+    best_score = max(score_a, score_b)
+    return merged, best_score, expanded
+# ============================================================
+# CORE AGENTIC PIPELINE
+# ============================================================
+def initialize_session() -> Dict:
+    return {
+        "chat_history": [],
+        "last_result": None,
+        "progress": new_progress_state(),
+    }
+def run_agentic_turn(user_query: str, session_state: Dict) -> Dict:
+    if session_state is None:
+        session_state = initialize_session()
+    progress = new_progress_state()
+    add_progress(progress, "User message received")
+    chat_history = session_state.get("chat_history", [])
+    add_progress(progress, "Detecting query type")
+    mode = classify_intent(user_query)
+    add_progress(progress, f"Detected mode: {mode}")
+    if mode == "NORMAL_CHAT":
+        add_progress(progress, "Running normal chat response")
+        answer = run_normal_chat(user_query, chat_history)
+        result = {
+            "mode": "normal_chat",
+            "final_answer": answer,
+            "retrieved_docs": [],
+            "best_score": -1.0,
+            "context": "",
+            "local_reasoning": "",
+            "summary_agent": "",
+            "composer_agent": "",
+            "progress_text": progress_text(progress),
         }
+    else:
+        add_progress(progress, "Running ECG retrieval")
+        docs, best_score, expanded_query = retrieve_docs(user_query)
+        add_progress(progress, f"Retrieved {len(docs)} document(s)")
+        add_progress(progress, f"Best score: {best_score:.3f}")
+        add_progress(progress, f"Expanded query: {expanded_query}")
+        context = build_context_string(docs)
+        if not context.strip():
+            add_progress(progress, "No strong ECG evidence found")
+            answer = "I could not find sufficiently relevant ECG evidence in the CSV knowledge base for this question."
+            result = {
+                "mode": "ecg_rag",
+                "final_answer": answer,
+                "retrieved_docs": docs,
+                "best_score": best_score,
+                "context": context,
+                "local_reasoning": "",
+                "summary_agent": "",
+                "composer_agent": "",
+                "progress_text": progress_text(progress),
+            }
+        else:
+            add_progress(progress, "Running local ECG adapter reasoning")
+            local_reasoning = run_local_reasoner(user_query, context)
+            add_progress(progress, "Running summary agent")
+            summary_agent = run_rag_summary(user_query, context, local_reasoning, chat_history)
+            add_progress(progress, "Running clinical composer agent")
+            composer_agent = run_clinical_composer(user_query, context, local_reasoning, chat_history)
+            add_progress(progress, "Running final merger agent")
+            final_answer = run_final_merger(user_query, context, local_reasoning, summary_agent, composer_agent)
+            if not final_answer.strip() or final_answer.strip() == "INSUFFICIENT_EVIDENCE":
+                final_answer = summary_agent if summary_agent.strip() else "INSUFFICIENT_EVIDENCE"
+            add_progress(progress, "Final answer ready")
+            result = {
+                "mode": "ecg_rag",
+                "final_answer": final_answer,
+                "retrieved_docs": docs,
+                "best_score": best_score,
+                "context": context,
+                "local_reasoning": local_reasoning,
+                "summary_agent": summary_agent,
+                "composer_agent": composer_agent,
+                "progress_text": progress_text(progress),
+            }
+    session_state["chat_history"].append({"role": "user", "content": user_query})
+    session_state["chat_history"].append({"role": "assistant", "content": result["final_answer"]})
+    session_state["chat_history"] = session_state["chat_history"][-12:]
+    session_state["last_result"] = result
+    session_state["progress"] = progress
+    return {"result": result, "session_state": session_state}
+# ============================================================
 # UI HELPERS
+# ============================================================
 CUSTOM_CSS = """
 html, body, .gradio-container {
     margin: 0 !important;
     padding: 0 !important;
+    background: #0b1220;
+    color: #e5e7eb;
 }
 .gradio-container {
+    max-width: 900px !important;
+    margin: 0 auto !important;
+    padding: 16px !important;
 }
+.simple-card {
+    border: 1px solid rgba(255,255,255,0.08);
+    background: #111827;
+    border-radius: 18px;
     padding: 16px;
     margin-bottom: 12px;
 }
+.app-title {
+    font-size: 1.4rem;
     font-weight: 800;
+    color: #f9fafb;
     margin-bottom: 6px;
 }
+.app-subtitle {
     font-size: 0.95rem;
+    color: #cbd5e1;
 }
 #chatbot {
+    min-height: 60vh !important;
     border-radius: 18px !important;
 }
+.status-box {
+    border: 1px solid rgba(255,255,255,0.08);
+    background: linear-gradient(180deg, #111827 0%, #172033 100%);
     border-radius: 16px;
+    padding: 12px 14px;
+    color: #f3f4f6;
 }
+.thinking-dots {
+    display: inline-block;
     letter-spacing: 4px;
+    font-weight: 800;
     animation: blinkDots 1s steps(1, end) infinite;
 }
 @keyframes blinkDots {
     0% { opacity: 1; }
+    50% { opacity: 0.2; }
     100% { opacity: 1; }
 }
 textarea, .gr-textbox textarea {
+    border-radius: 14px !important;
 }
 button {
     border-radius: 14px !important;
     min-height: 44px !important;
     font-weight: 600 !important;
 }
 """
+def header_html() -> str:
     return """
+    <div class="simple-card">
+        <div class="app-title">🫀 Agentic ECG Chatbot</div>
+        <div class="app-subtitle">
+            Starts as normal chat. If the question is ECG/cardiology-related, it automatically switches into ECG evidence mode,
+            retrieves from your CSV knowledge base, runs local ECG adapter reasoning, builds two summaries, and merges them into one long final answer.
         </div>
     </div>
     """
 def thinking_html(stage: str) -> str:
     return f"""
+    <div class="status-box">
+        <b>{stage}</b><br>
+        Model is thinking <span class="thinking-dots">...</span>
     </div>
     """
+def add_assistant_placeholder(history, text="Thinking..."):
     history = history or []
+    history.append({"role": "assistant", "content": text, "metadata": {"title": "Thinking"}})
     return history
+def update_last_assistant_message(history, text, title="Answer"):
     history = history or []
     if not history or history[-1]["role"] != "assistant":
+        history.append({"role": "assistant", "content": text, "metadata": {"title": title}})
         return history
+    history[-1] = {"role": "assistant", "content": text, "metadata": {"title": title}}
     return history
+def user_submit(user_message, chat_history):
+    chat_history = chat_history or []
     user_message = (user_message or "").strip()
     if not user_message:
+        return "", chat_history
+    chat_history.append({"role": "user", "content": user_message})
+    return "", chat_history
+def format_sources(result: Optional[Dict]) -> str:
+    if not result:
+        return "No sources yet."
+    docs = result.get("retrieved_docs", [])
+    if not docs:
+        return "No ECG retrieval used for the last answer."
+    lines = [f"Best score: {result.get('best_score', -1.0):.3f}", ""]
+    for i, d in enumerate(docs, 1):
+        q = d.metadata.get("question", "")
+        a = d.metadata.get("answer", "")
+        sim = d.metadata.get("sim_score", "N/A")
+        preview = a[:220] + ("..." if len(a) > 220 else "")
+        lines += [
+            f"Evidence {i}",
+            f"- Question: {q}",
+            f"- Similarity: {sim}",
+            f"- Preview: {preview}",
+            "",
+        ]
+    return "\n".join(lines).strip()
+def clear_chat():
+    st = initialize_session()
+    return [], st, "", "No progress yet.", "No sources yet."
+def rebuild_store(session_state, chat_history):
+    global vectorstore
+    if not cfg.allow_rebuild_vectorstore:
+        chat_history = chat_history or []
+        chat_history.append({"role": "assistant", "content": "Vector store rebuild is disabled.", "metadata": {"title": "Restricted"}})
+        return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
+    build_vectorstore()
+    vectorstore = load_vectorstore()
+    chat_history = chat_history or []
+    chat_history.append({"role": "assistant", "content": "✅ Vector store rebuilt.", "metadata": {"title": "Done"}})
+    return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
+# ============================================================
+# STREAMING RESPONSE
+# ============================================================
+def bot_respond_stream(chat_history, session_state):
     if session_state is None:
         session_state = initialize_session()
+    if not chat_history:
+        yield chat_history, session_state, "", "No progress yet.", "No sources yet."
         return
+    user_message = str(chat_history[-1]["content"]).strip()
+    chat_history = add_assistant_placeholder(chat_history, "Thinking...")
+    yield chat_history, session_state, thinking_html("Understanding your message"), "Starting...", ""
+    time.sleep(0.4)
+    yield chat_history, session_state, thinking_html("Detecting whether this is normal chat or ECG reasoning"), "Detecting intent...", ""
+    time.sleep(0.4)
+    detected = classify_intent(user_message)
+    if detected == "NORMAL_CHAT":
+        yield chat_history, session_state, thinking_html("Normal chatbot mode active"), "Running normal chat...", ""
+        time.sleep(0.4)
     else:
+        yield chat_history, session_state, thinking_html("ECG mode detected: retrieving evidence"), "Retrieving ECG evidence...", ""
+        time.sleep(0.45)
+        yield chat_history, session_state, thinking_html("Running local ECG adapter reasoning"), "Running local reasoning...", ""
+        time.sleep(0.45)
+        yield chat_history, session_state, thinking_html("Generating multiple summaries and composing final answer"), "Generating final answer...", ""
+        time.sleep(0.45)
+    out = run_agentic_turn(user_message, session_state)
+    result = out["result"]
+    updated_session = out["session_state"]
+    answer = result.get("final_answer", "I could not generate an answer.")
+    sources = format_sources(result)
+    prog = result.get("progress_text", "No progress yet.")
     if cfg.enable_typewriter_stream:
+        for partial in stream_text(answer, step=140):
+            chat_history = update_last_assistant_message(chat_history, partial, title="Answer")
+            yield chat_history, updated_session, "", prog, sources
+    chat_history = update_last_assistant_message(chat_history, answer, title="Answer")
+    yield chat_history, updated_session, "", prog, sources
+# ============================================================
 # APP
+# ============================================================
+with gr.Blocks(title="Agentic ECG Chatbot", css=CUSTOM_CSS) as demo:
+    gr.HTML(header_html())
     session_state = gr.State(initialize_session())
+    chatbot = gr.Chatbot(
+        label="Chat",
+        elem_id="chatbot",
+        type="messages",
+        show_copy_button=True,
+        bubble_full_width=False,
+    )
+    user_box = gr.Textbox(
+        label="Message",
+        placeholder="Ask anything. ECG / cardiology questions are detected automatically.",
+        lines=2,
+        autofocus=True,
+    )
+    status_html = gr.HTML("")
+    with gr.Row():
+        send_btn = gr.Button("Submit", variant="primary")
+        clear_btn = gr.Button("Clear")
+    with gr.Accordion("Progress Log", open=False):
+        progress_panel = gr.Textbox(value="No progress yet.", lines=16, interactive=False)
+    with gr.Accordion("Retrieved ECG Sources", open=False):
+        sources_panel = gr.Textbox(value="No sources yet.", lines=16, interactive=False)
     submit_event = user_box.submit(
         fn=user_submit,
         outputs=[user_box, chatbot],
         queue=True,
     )
     submit_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
+        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=True,
     )
+    send_event = send_btn.click(
         fn=user_submit,
         inputs=[user_box, chatbot],
         outputs=[user_box, chatbot],
         queue=True,
     )
+    send_event.then(
         fn=bot_respond_stream,
         inputs=[chatbot, session_state],
+        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=True,
     )
     clear_btn.click(
         fn=clear_chat,
         inputs=[],
+        outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
         queue=False,
     )
 demo.queue(default_concurrency_limit=1)
         debug=cfg.launch_debug,
         server_name=cfg.server_name,
         server_port=cfg.server_port,
+    )