resberry commited on
Commit
41aa811
·
verified ·
1 Parent(s): eb76838

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1136 -578
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import os
2
  import re
3
  import time
4
- import json
5
- import queue
6
- import logging
7
- import threading
8
  import traceback
9
- from typing import List, Dict, TypedDict, Optional, Tuple
 
10
  from dataclasses import dataclass, field
11
 
12
  import torch
@@ -20,44 +17,47 @@ from langchain_core.documents import Document
20
  from langchain_huggingface import HuggingFaceEmbeddings
21
  from langchain_community.vectorstores import FAISS
22
  from langchain_openai import ChatOpenAI
23
-
24
 
25
  # ============================================================
26
- # AGENTIC ECG CHATBOT
27
- # - Starts as normal chatbot
28
- # - Detects ECG / cardiology intent automatically
29
- # - Retrieves from CSV RAG store only for ECG questions
30
- # - Runs local ECG adapter reasoning
31
- # - Runs remote evidence summarizer
32
- # - Runs remote clinical-composer agent
33
- # - Merges both into a final long answer
34
- # - Simple UI with Send / Clear
35
- # - Visible thinking status + progress logs
36
  # ============================================================
37
 
38
- raw_omp = str(os.getenv("OMP_NUM_THREADS", "1")).strip()
39
- os.environ["OMP_NUM_THREADS"] = raw_omp if re.fullmatch(r"\d+", raw_omp) else "1"
40
-
41
-
42
- # ============================================================
43
  # LOGGING
44
- # ============================================================
45
  logging.basicConfig(
46
  level=logging.INFO,
47
- format="%(asctime)s | %(levelname)s | %(message)s"
48
  )
49
- logger = logging.getLogger("agentic_ecg_chatbot")
50
 
51
 
52
- # ============================================================
53
  # CONFIG
54
- # ============================================================
55
  @dataclass
56
  class Config:
57
- base_model_path: str = os.getenv("BASE_MODEL_PATH", "meta-llama/Llama-3.1-8B-Instruct")
58
- adapter_dir: str = os.getenv("ADAPTER_DIR", "adapter_refined_v10")
59
- data_csv: str = os.getenv("DATA_CSV", "RAGmaterials/ECG_RAG_only_clean.csv")
60
- rag_dir: str = os.getenv("RAG_DIR", "RAGmaterials")
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  vectorstore_dir: str = field(init=False)
62
 
63
  hf_token: str = os.getenv("HF_TOKEN", "")
@@ -65,41 +65,51 @@ class Config:
65
  deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
66
  deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
67
 
68
- embed_model_name: str = os.getenv("EMBED_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
69
 
70
- similarity_k: int = int(os.getenv("SIMILARITY_K", "10"))
71
  top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
72
- max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5500"))
73
 
74
  max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
75
- max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "220"))
76
  max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
77
 
78
- min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.06"))
79
- min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.18"))
80
-
81
- deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.15"))
82
- deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "900"))
83
 
84
- use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
85
  enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
 
86
  enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
87
- enable_warmup: bool = os.getenv("ENABLE_WARMUP", "true").lower() == "true"
88
  allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
89
 
 
 
90
  launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
91
  server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
92
  server_port: int = int(os.getenv("SERVER_PORT", "7860"))
93
 
 
 
 
 
 
94
  def __post_init__(self):
95
  self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
96
  os.makedirs(self.rag_dir, exist_ok=True)
97
 
98
  if not self.deepseek_api_key:
99
- raise ValueError("Missing DEEPSEEK_API_KEY in environment / Space secrets.")
100
-
101
- if not self.hf_token:
102
- raise ValueError("Missing HF_TOKEN in environment / Space secrets.")
103
 
104
  for path, name in [
105
  (self.adapter_dir, "Adapter directory"),
@@ -113,38 +123,20 @@ cfg = Config()
113
  logger.info("Configuration loaded.")
114
 
115
 
116
- # ============================================================
117
  # PROMPTS
118
- # ============================================================
119
- INTENT_CLASSIFIER_SYSTEM = """
120
- You classify user messages.
121
-
122
- Return only one label:
123
- - ECG_RAG
124
- - NORMAL_CHAT
125
 
126
- Choose ECG_RAG if the message is about ECG, EKG, cardiology, arrhythmia, heart rhythm, cardiac conduction,
127
- ST changes, QRS, PR, QT, tachycardia, bradycardia, atrial fibrillation, flutter, bundle branch block,
128
- heart block, hyperkalemia ECG changes, or similar cardiology interpretation.
129
- Otherwise return NORMAL_CHAT.
130
- """.strip()
131
 
132
- QUERY_EXPANSION_SYSTEM = """
133
- You expand ECG and cardiology retrieval queries.
134
  Rules:
135
- 1. Preserve the exact user intent.
136
- 2. Add close cardiology / ECG synonyms and alternate wording.
137
- 3. Do not answer the question.
138
- 4. Output only the expanded retrieval query.
139
- """.strip()
140
-
141
- LOCAL_REASONING_SYSTEM = """
142
- You are a strict ECG and cardiology reasoning assistant.
143
- You are not the final answer generator.
144
- Use only the evidence provided.
145
- Do not invent facts.
146
-
147
- Output exactly in this format:
148
 
149
  KEY_FINDINGS:
150
  - ...
@@ -161,102 +153,138 @@ SUPPORTED_POINTS:
161
  LIMITS:
162
  - ...
163
 
164
- If evidence is insufficient, output exactly:
165
  INSUFFICIENT_EVIDENCE
166
  """.strip()
167
 
168
- RAG_SUMMARY_SYSTEM = """
169
- You are a clinical evidence summarizer.
170
- Write a well-structured answer grounded only in the provided evidence and reasoning draft.
171
- Do not use outside knowledge.
172
- Be accurate, conservative, and clinically clear.
173
-
174
- Output format:
175
- ### Summary
176
- 4 to 7 full sentences.
177
-
178
- ### Key Evidence Points
179
- 4 to 6 bullet points.
180
-
181
- ### Clinical Interpretation
182
- 2 to 4 bullet points if supported.
183
-
184
- ### Evidence Limits
185
- State what is not established.
186
 
187
- If the evidence is too weak, output exactly:
188
- INSUFFICIENT_EVIDENCE
 
 
 
 
189
  """.strip()
190
 
191
- CLINICAL_COMPOSER_SYSTEM = """
192
- You are a second medical composition agent.
193
- Your job is to produce a longer, polished explanation from the same evidence and the same user question.
194
- You must stay faithful to the evidence.
195
- Do not add unsupported facts.
196
- Do not mention tools, prompts, or pipelines.
197
-
198
- Output format:
199
- ### Direct Answer
200
- A direct answer in 2 to 3 sentences.
201
 
202
- ### Expanded Explanation
203
- A longer explanation in 5 to 8 sentences.
 
204
 
205
- ### Important Notes
206
- 3 to 5 bullet points.
207
 
208
- ### Remaining Uncertainty
209
- State what the evidence does not prove.
 
 
 
210
 
211
- If the evidence is too weak, output exactly:
 
 
 
 
 
 
212
  INSUFFICIENT_EVIDENCE
213
- """.strip()
214
-
215
- FINAL_MERGER_SYSTEM = """
216
- You are the final answer agent.
217
- You will receive:
218
- 1. the user's question
219
- 2. retrieved evidence
220
- 3. a local ECG adapter reasoning draft
221
- 4. summary agent output
222
- 5. clinical composer output
223
-
224
- Write one final long-form answer.
225
- Rules:
226
- - Use only supported information.
227
- - Merge overlapping ideas cleanly.
228
- - Do not repeat the same point too many times.
229
- - Make the answer helpful, detailed, and readable.
230
- - Do not mention internal agents or processing steps.
231
-
232
- Output format:
233
- ### Final Answer
234
- A detailed answer in 6 to 10 sentences.
235
 
236
- ### Key Points
237
- 4 to 6 bullets.
 
 
 
 
 
 
 
238
 
239
- ### Clinical Perspective
240
- 2 to 4 bullets if supported.
241
 
242
- ### Limits
243
- A short honest limitations section.
 
 
 
 
 
244
 
245
- If evidence is weak, output exactly:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  INSUFFICIENT_EVIDENCE
247
  """.strip()
248
 
249
- NORMAL_CHAT_SYSTEM = """
250
- You are a helpful, friendly chatbot.
251
- Be conversational, clear, and useful.
252
- Answer normally.
253
- Do not mention hidden tools or internal systems.
 
 
 
 
 
 
 
 
254
  """.strip()
255
 
256
 
257
- # ============================================================
258
  # HELPERS
259
- # ============================================================
260
  def clean_text(x: str) -> str:
261
  x = str(x).replace("\x00", " ").strip()
262
  x = re.sub(r"\s+", " ", x)
@@ -265,6 +293,20 @@ def clean_text(x: str) -> str:
265
 
266
  def strip_bad_sections(txt: str) -> str:
267
  t = str(txt).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
269
  return t
270
 
@@ -272,16 +314,22 @@ def strip_bad_sections(txt: str) -> str:
272
  def infer_tags(question: str, answer: str) -> List[str]:
273
  text = f"{question} {answer}".lower()
274
  tags: List[str] = []
 
275
  keyword_map = {
276
- "ecg": ["ecg", "ekg", "qrs", "pr", "qt", "st elevation", "t wave", "arrhythmia", "tachycardia", "bradycardia"],
277
  "diagnosis": ["diagnosis", "diagnose", "criteria"],
278
- "treatment": ["treat", "therapy", "management", "drug"],
279
- "symptoms": ["symptom", "sign", "presentation"],
280
- "etiology": ["cause", "caused by", "associated with", "risk factor"],
 
 
 
281
  }
 
282
  for tag, words in keyword_map.items():
283
  if any(w in text for w in words):
284
  tags.append(tag)
 
285
  return tags
286
 
287
 
@@ -305,95 +353,110 @@ def lexical_overlap(query: str, text: str) -> float:
305
  return len(q_words & t_words) / max(1, len(q_words))
306
 
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
309
- max_turns = max_turns or cfg.max_chat_history_turns
 
 
310
  items = chat_history[-max_turns:]
311
  if not items:
312
  return "[EMPTY]"
 
313
  return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
314
 
315
 
316
  def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
317
- max_chars = max_chars or cfg.max_context_chars
 
 
318
  blocks = []
319
  total = 0
 
320
  for i, d in enumerate(docs, 1):
321
  q = d.metadata.get("question", "")
322
  a = d.metadata.get("answer", "")
323
  tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
324
- sim = d.metadata.get("sim_score", "N/A")
 
325
  block = f"""
326
  ==============================
327
  EVIDENCE_ID: {i}
328
  SOURCE_ID: {d.metadata.get('id')}
329
  SOURCE_QUESTION: {q}
330
  SOURCE_TAGS: {tags}
331
- SIMILARITY: {sim}
332
  EVIDENCE_TEXT:
333
  {a}
334
  ==============================
335
  """.strip()
 
336
  if total + len(block) > max_chars:
337
  break
 
338
  blocks.append(block)
339
  total += len(block) + 2
340
- return "\n\n".join(blocks).strip()
341
 
342
-
343
- def stream_text(text: str, step: int = 120):
344
- acc = ""
345
- for i in range(0, len(text), step):
346
- acc += text[i:i + step]
347
- yield acc
348
-
349
-
350
- # ============================================================
351
- # PROGRESS / LOGGING
352
- # ============================================================
353
- def new_progress_state() -> Dict:
354
- return {"lines": []}
355
 
356
 
357
- def add_progress(progress_state: Dict, msg: str):
358
- line = f"[{time.strftime('%H:%M:%S')}] {msg}"
359
- logger.info(msg)
360
- progress_state["lines"].append(line)
361
- progress_state["lines"] = progress_state["lines"][-80:]
362
 
 
 
 
 
 
 
363
 
364
- def progress_text(progress_state: Dict) -> str:
365
- lines = progress_state.get("lines", [])
366
- return "\n".join(lines) if lines else "No progress yet."
367
 
368
 
369
- # ============================================================
370
- # ECG QUERY DETECTION
371
- # ============================================================
372
- ECG_REGEXES = [
373
- r"\becg\b", r"\bekg\b", r"\bcardiology\b", r"\barrhythmia\b", r"\bheart rhythm\b",
374
- r"\batrial fibrillation\b", r"\bafib\b", r"\bflutter\b", r"\bqrs\b", r"\bpr interval\b",
375
- r"\bqt\b", r"\bst elevation\b", r"\bst depression\b", r"\bt wave\b", r"\bbradycardia\b",
376
- r"\btachycardia\b", r"\bheart block\b", r"\bbundle branch block\b", r"\bhyperkalemia\b",
377
- ]
378
 
379
 
380
- def detect_ecg_by_rules(text: str) -> bool:
381
- text = str(text or "").lower().strip()
382
- return any(re.search(p, text) for p in ECG_REGEXES)
 
 
383
 
384
 
385
- # ============================================================
386
  # EMBEDDINGS + VECTORSTORE
387
- # ============================================================
388
  logger.info("Loading embeddings...")
389
- embeddings = HuggingFaceEmbeddings(
390
- model_name=cfg.embed_model_name,
391
- model_kwargs={
392
- "device": "cuda" if torch.cuda.is_available() else "cpu",
393
- "token": cfg.hf_token if cfg.hf_token else None,
394
- },
395
- encode_kwargs={"normalize_embeddings": True},
396
- )
397
 
398
 
399
  def build_vectorstore():
@@ -421,7 +484,7 @@ def build_vectorstore():
421
  "question": q,
422
  "answer": a,
423
  "tags": infer_tags(q, a),
424
- },
425
  )
426
  )
427
 
@@ -446,15 +509,16 @@ vectorstore = load_vectorstore()
446
  logger.info("Vectorstore ready.")
447
 
448
 
449
- # ============================================================
450
- # MODEL LOADING
451
- # ============================================================
452
  logger.info("Loading tokenizer...")
453
  tokenizer = AutoTokenizer.from_pretrained(
454
  cfg.base_model_path,
455
  use_fast=True,
456
- token=cfg.hf_token if cfg.hf_token else None,
457
  )
 
458
  if tokenizer.pad_token is None:
459
  tokenizer.pad_token = tokenizer.eos_token
460
 
@@ -495,19 +559,10 @@ if base_model is None:
495
 
496
  base_model.eval()
497
 
498
- logger.info("Loading ECG adapter...")
499
  reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
500
  reason_model.eval()
501
 
502
- logger.info("Loading remote LLM client...")
503
- remote_llm = ChatOpenAI(
504
- model=cfg.deepseek_model,
505
- api_key=cfg.deepseek_api_key,
506
- base_url=cfg.deepseek_base_url,
507
- temperature=cfg.deepseek_temperature,
508
- max_tokens=cfg.deepseek_max_tokens,
509
- )
510
-
511
 
512
  def get_primary_model_device(model) -> torch.device:
513
  try:
@@ -516,50 +571,15 @@ def get_primary_model_device(model) -> torch.device:
516
  return torch.device("cuda" if torch.cuda.is_available() else "cpu")
517
 
518
 
519
- # ============================================================
520
- # LLM CALLS
521
- # ============================================================
522
- def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
523
- try:
524
- resp = remote_llm.invoke([
525
- {"role": "system", "content": system_prompt},
526
- {"role": "user", "content": user_prompt},
527
- ])
528
- text = resp.content if hasattr(resp, "content") else str(resp)
529
- text = strip_bad_sections(text)
530
- return text if text.strip() else fallback
531
- except Exception as e:
532
- logger.error(f"Remote LLM error: {e}")
533
- traceback.print_exc()
534
- return fallback
535
-
536
-
537
- def classify_intent(user_query: str) -> str:
538
- if detect_ecg_by_rules(user_query):
539
- return "ECG_RAG"
540
-
541
- result = llm_text(
542
- INTENT_CLASSIFIER_SYSTEM,
543
- f"USER_MESSAGE:\n{user_query}",
544
- fallback="NORMAL_CHAT",
545
- ).strip().upper()
546
- return "ECG_RAG" if "ECG_RAG" in result else "NORMAL_CHAT"
547
-
548
-
549
- def run_query_expansion(user_query: str) -> str:
550
- if not cfg.enable_query_expansion:
551
- return user_query
552
- prompt = f"USER_QUERY:\n{user_query}\n\nExpand this for ECG/cardiology retrieval."
553
- expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
554
- return expanded.strip() if expanded else user_query
555
-
556
-
557
  @torch.inference_mode()
558
  def run_local_reasoner(user_query: str, context: str) -> str:
559
  try:
560
  messages = [
561
  {"role": "system", "content": LOCAL_REASONING_SYSTEM},
562
- {"role": "user", "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context or '[EMPTY]'}"},
 
 
 
563
  ]
564
 
565
  prompt = tokenizer.apply_chat_template(
@@ -591,81 +611,117 @@ def run_local_reasoner(user_query: str, context: str) -> str:
591
 
592
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
593
  text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
594
- return strip_bad_sections(text) or "INSUFFICIENT_EVIDENCE"
 
 
 
595
  except Exception as e:
596
  logger.error(f"Local reasoner error: {e}")
597
  traceback.print_exc()
598
  return "INSUFFICIENT_EVIDENCE"
599
 
600
 
601
- def run_rag_summary(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
602
- prompt = f"""
603
- CHAT_HISTORY:
604
- {history_to_text(chat_history)}
 
 
 
 
 
 
605
 
606
- USER_QUESTION:
607
- {user_query}
608
 
609
- RETRIEVED_EVIDENCE:
610
- {context if context.strip() else '[EMPTY]'}
611
 
612
- LOCAL_REASONING_DRAFT:
613
- {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
614
- """.strip()
615
- return llm_text(RAG_SUMMARY_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
 
 
 
 
 
 
 
 
 
616
 
617
 
618
- def run_clinical_composer(user_query: str, context: str, reasoning_draft: str, chat_history: List[Dict[str, str]]) -> str:
619
- prompt = f"""
620
- CHAT_HISTORY:
621
- {history_to_text(chat_history)}
622
 
623
- USER_QUESTION:
624
- {user_query}
 
625
 
626
- RETRIEVED_EVIDENCE:
627
- {context if context.strip() else '[EMPTY]'}
 
628
 
629
- LOCAL_REASONING_DRAFT:
630
- {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
631
  """.strip()
632
- return llm_text(CLINICAL_COMPOSER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
 
 
 
 
 
 
 
633
 
634
 
635
- def run_final_merger(user_query: str, context: str, reasoning_draft: str, summary_a: str, summary_b: str) -> str:
 
 
 
 
 
636
  prompt = f"""
 
 
 
637
  USER_QUESTION:
638
  {user_query}
639
 
640
  RETRIEVED_EVIDENCE:
641
  {context if context.strip() else '[EMPTY]'}
642
 
643
- LOCAL_ECG_REASONING:
644
  {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
645
 
646
- SUMMARY_AGENT_OUTPUT:
647
- {summary_a if summary_a.strip() else '[EMPTY]'}
648
-
649
- CLINICAL_COMPOSER_OUTPUT:
650
- {summary_b if summary_b.strip() else '[EMPTY]'}
651
  """.strip()
652
- return llm_text(FINAL_MERGER_SYSTEM, prompt, fallback="INSUFFICIENT_EVIDENCE")
653
 
 
 
 
 
 
 
 
 
 
 
654
 
655
- def run_normal_chat(user_query: str, chat_history: List[Dict[str, str]]) -> str:
656
  prompt = f"""
657
- CHAT_HISTORY:
658
- {history_to_text(chat_history)}
659
 
660
- USER_MESSAGE:
661
- {user_query}
662
  """.strip()
663
- return llm_text(NORMAL_CHAT_SYSTEM, prompt, fallback="Sorry, I could not generate a response.")
664
 
 
665
 
666
- # ============================================================
 
667
  # WARMUP
668
- # ============================================================
669
  def warmup_models():
670
  logger.info("Warming up local reasoner...")
671
  try:
@@ -676,7 +732,6 @@ def warmup_models():
676
  EVIDENCE_ID: 1
677
  SOURCE_QUESTION: What are ECG findings in hyperkalemia?
678
  SOURCE_TAGS: ecg
679
- SIMILARITY: 0.9
680
  EVIDENCE_TEXT:
681
  Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
682
  ==============================
@@ -687,58 +742,38 @@ Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe
687
  logger.warning(f"Warmup failed: {e}")
688
 
689
 
690
- if cfg.enable_warmup:
691
- warmup_models()
692
 
693
 
694
- # ============================================================
695
  # STATE
696
- # ============================================================
697
- class AgentState(TypedDict, total=False):
698
  user_query: str
699
- chat_history: List[Dict[str, str]]
700
-
701
- detected_mode: str
702
  expanded_query: str
 
703
 
704
  retrieved_docs: List[Document]
705
  best_score: float
 
706
  context: str
 
 
707
 
708
- local_reasoning: str
709
- summary_agent: str
710
- composer_agent: str
711
  final_answer: str
 
712
 
713
 
714
- # ============================================================
715
  # RETRIEVAL
716
- # ============================================================
717
- def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
718
- top_n = top_n or cfg.top_k_final
719
- q_words = set(re.findall(r"\w+", query.lower()))
720
- scored = []
721
-
722
- for d in docs:
723
- question = d.metadata.get("question", "")
724
- answer = d.metadata.get("answer", "")
725
- tags = " ".join(d.metadata.get("tags", []))
726
- text = f"{question} {answer} {tags}".lower()
727
- t_words = set(re.findall(r"\w+", text))
728
- overlap = len(q_words & t_words) / max(1, len(q_words))
729
- question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
730
- tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
731
- sim_score = float(d.metadata.get("sim_score", 0.0))
732
- final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
733
- scored.append((d, final_score))
734
-
735
- scored.sort(key=lambda x: x[1], reverse=True)
736
- return [d for d, _ in scored[:top_n]]
737
-
738
-
739
- def retrieve_docs_once(query_for_search: str, original_query: str) -> Tuple[List[Document], float]:
740
  try:
741
- scored = vectorstore.similarity_search_with_score(query_for_search, k=cfg.similarity_k)
 
 
 
742
  except Exception as e:
743
  logger.error(f"Retriever error: {e}")
744
  traceback.print_exc()
@@ -748,216 +783,493 @@ def retrieve_docs_once(query_for_search: str, original_query: str) -> Tuple[List
748
  return [], -1.0
749
 
750
  filtered_docs = []
 
 
751
  for doc, raw_score in scored:
752
  sim = score_to_similarity(raw_score)
 
 
753
  q = doc.metadata.get("question", "")
754
  a = doc.metadata.get("answer", "")
755
  ov = lexical_overlap(original_query, f"{q} {a}")
756
 
757
- if sim >= 0.45 or (ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity):
758
  new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
759
  new_doc.metadata["sim_score"] = sim
760
  new_doc.metadata["lexical_overlap"] = ov
761
  filtered_docs.append(new_doc)
762
 
763
  reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
764
- best_score = max((float(d.metadata.get("sim_score", -1.0)) for d in reranked), default=-1.0)
765
  return reranked, best_score
766
 
767
 
768
- def retrieve_docs(query: str) -> Tuple[List[Document], float, str]:
769
- docs_a, score_a = retrieve_docs_once(query, query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770
  if not cfg.enable_query_expansion:
771
- return docs_a, score_a, query
772
 
773
- expanded = run_query_expansion(query)
774
- docs_b, score_b = retrieve_docs_once(expanded, query)
775
 
776
- merged = []
777
- seen_ids = set()
778
- for d in docs_a + docs_b:
779
- doc_id = d.metadata.get("id")
780
- if doc_id not in seen_ids:
781
- seen_ids.add(doc_id)
782
- merged.append(d)
783
 
784
- merged = rerank_docs(query, merged, top_n=cfg.top_k_final)
785
- best_score = max(score_a, score_b)
786
- return merged, best_score, expanded
787
 
 
 
 
 
 
788
 
789
- # ============================================================
790
- # CORE AGENTIC PIPELINE
791
- # ============================================================
792
- def initialize_session() -> Dict:
793
- return {
794
- "chat_history": [],
795
- "last_result": None,
796
- "progress": new_progress_state(),
797
- }
798
 
 
 
 
 
799
 
800
- def run_agentic_turn(user_query: str, session_state: Dict) -> Dict:
801
- if session_state is None:
802
- session_state = initialize_session()
803
 
804
- progress = new_progress_state()
805
- add_progress(progress, "User message received")
806
 
807
- chat_history = session_state.get("chat_history", [])
 
 
 
808
 
809
- add_progress(progress, "Detecting query type")
810
- mode = classify_intent(user_query)
811
- add_progress(progress, f"Detected mode: {mode}")
812
 
813
- if mode == "NORMAL_CHAT":
814
- add_progress(progress, "Running normal chat response")
815
- answer = run_normal_chat(user_query, chat_history)
816
- result = {
817
- "mode": "normal_chat",
818
- "final_answer": answer,
819
- "retrieved_docs": [],
820
- "best_score": -1.0,
821
- "context": "",
822
- "local_reasoning": "",
823
- "summary_agent": "",
824
- "composer_agent": "",
825
- "progress_text": progress_text(progress),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826
  }
827
- else:
828
- add_progress(progress, "Running ECG retrieval")
829
- docs, best_score, expanded_query = retrieve_docs(user_query)
830
-
831
- add_progress(progress, f"Retrieved {len(docs)} document(s)")
832
- add_progress(progress, f"Best score: {best_score:.3f}")
833
- add_progress(progress, f"Expanded query: {expanded_query}")
834
-
835
- context = build_context_string(docs)
836
-
837
- if not context.strip():
838
- add_progress(progress, "No strong ECG evidence found")
839
- answer = "I could not find sufficiently relevant ECG evidence in the CSV knowledge base for this question."
840
- result = {
841
- "mode": "ecg_rag",
842
- "final_answer": answer,
843
- "retrieved_docs": docs,
844
- "best_score": best_score,
845
- "context": context,
846
- "local_reasoning": "",
847
- "summary_agent": "",
848
- "composer_agent": "",
849
- "progress_text": progress_text(progress),
850
- }
851
- else:
852
- add_progress(progress, "Running local ECG adapter reasoning")
853
- local_reasoning = run_local_reasoner(user_query, context)
854
 
855
- add_progress(progress, "Running summary agent")
856
- summary_agent = run_rag_summary(user_query, context, local_reasoning, chat_history)
 
 
 
857
 
858
- add_progress(progress, "Running clinical composer agent")
859
- composer_agent = run_clinical_composer(user_query, context, local_reasoning, chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
 
861
- add_progress(progress, "Running final merger agent")
862
- final_answer = run_final_merger(user_query, context, local_reasoning, summary_agent, composer_agent)
863
 
864
- if not final_answer.strip() or final_answer.strip() == "INSUFFICIENT_EVIDENCE":
865
- final_answer = summary_agent if summary_agent.strip() else "INSUFFICIENT_EVIDENCE"
866
 
867
- add_progress(progress, "Final answer ready")
868
- result = {
869
- "mode": "ecg_rag",
870
- "final_answer": final_answer,
871
- "retrieved_docs": docs,
872
- "best_score": best_score,
873
- "context": context,
874
- "local_reasoning": local_reasoning,
875
- "summary_agent": summary_agent,
876
- "composer_agent": composer_agent,
877
- "progress_text": progress_text(progress),
878
- }
879
 
880
- session_state["chat_history"].append({"role": "user", "content": user_query})
881
- session_state["chat_history"].append({"role": "assistant", "content": result["final_answer"]})
882
- session_state["chat_history"] = session_state["chat_history"][-12:]
883
- session_state["last_result"] = result
884
- session_state["progress"] = progress
885
 
886
- return {"result": result, "session_state": session_state}
 
 
 
 
 
887
 
 
 
 
 
 
888
 
889
- # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  # UI HELPERS
891
- # ============================================================
892
  CUSTOM_CSS = """
 
 
 
 
 
 
 
 
 
 
 
 
 
893
  html, body, .gradio-container {
894
  margin: 0 !important;
895
  padding: 0 !important;
896
- background: #0b1220;
897
- color: #e5e7eb;
 
 
 
 
898
  }
 
899
  .gradio-container {
900
- max-width: 900px !important;
901
- margin: 0 auto !important;
902
- padding: 16px !important;
903
  }
904
- .simple-card {
905
- border: 1px solid rgba(255,255,255,0.08);
906
- background: #111827;
907
- border-radius: 18px;
 
 
 
 
 
908
  padding: 16px;
909
  margin-bottom: 12px;
 
910
  }
911
- .app-title {
912
- font-size: 1.4rem;
 
913
  font-weight: 800;
914
- color: #f9fafb;
915
  margin-bottom: 6px;
 
916
  }
917
- .app-subtitle {
918
- font-size: 0.95rem;
919
  color: #cbd5e1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  }
 
921
  #chatbot {
922
- min-height: 60vh !important;
 
923
  border-radius: 18px !important;
 
 
 
924
  }
925
- .status-box {
926
- border: 1px solid rgba(255,255,255,0.08);
927
- background: linear-gradient(180deg, #111827 0%, #172033 100%);
928
- border-radius: 16px;
929
  padding: 12px 14px;
930
- color: #f3f4f6;
 
 
 
 
 
931
  }
932
- .thinking-dots {
933
- display: inline-block;
934
- letter-spacing: 4px;
 
 
 
 
 
935
  font-weight: 800;
 
936
  animation: blinkDots 1s steps(1, end) infinite;
 
 
937
  }
 
938
  @keyframes blinkDots {
939
  0% { opacity: 1; }
940
- 50% { opacity: 0.2; }
941
  100% { opacity: 1; }
942
  }
 
943
  textarea, .gr-textbox textarea {
944
- border-radius: 14px !important;
 
 
 
 
 
945
  }
 
946
  button {
947
  border-radius: 14px !important;
948
  min-height: 44px !important;
949
  font-weight: 600 !important;
950
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951
  """
952
 
953
 
954
- def header_html() -> str:
955
  return """
956
- <div class="simple-card">
957
- <div class="app-title">🫀 Agentic ECG Chatbot</div>
958
- <div class="app-subtitle">
959
- Starts as normal chat. If the question is ECG/cardiology-related, it automatically switches into ECG evidence mode,
960
- retrieves from your CSV knowledge base, runs local ECG adapter reasoning, builds two summaries, and merges them into one long final answer.
 
 
 
 
 
 
961
  </div>
962
  </div>
963
  """
@@ -965,160 +1277,398 @@ def header_html() -> str:
965
 
966
  def thinking_html(stage: str) -> str:
967
  return f"""
968
- <div class="status-box">
969
- <b>{stage}</b><br>
970
- Model is thinking <span class="thinking-dots">...</span>
 
 
 
 
 
 
971
  </div>
972
  """
973
 
974
 
975
- def add_assistant_placeholder(history, text="Thinking..."):
 
 
 
 
976
  history = history or []
977
- history.append({"role": "assistant", "content": text, "metadata": {"title": "Thinking"}})
 
 
 
 
978
  return history
979
 
980
 
981
- def update_last_assistant_message(history, text, title="Answer"):
982
  history = history or []
983
  if not history or history[-1]["role"] != "assistant":
984
- history.append({"role": "assistant", "content": text, "metadata": {"title": title}})
 
 
 
985
  return history
986
- history[-1] = {"role": "assistant", "content": text, "metadata": {"title": title}}
 
 
 
987
  return history
988
 
989
 
990
- def user_submit(user_message, chat_history):
991
- chat_history = chat_history or []
992
  user_message = (user_message or "").strip()
 
993
  if not user_message:
994
- return "", chat_history
995
- chat_history.append({"role": "user", "content": user_message})
996
- return "", chat_history
997
 
 
 
998
 
999
- def format_sources(result: Optional[Dict]) -> str:
1000
- if not result:
1001
- return "No sources yet."
1002
- docs = result.get("retrieved_docs", [])
1003
- if not docs:
1004
- return "No ECG retrieval used for the last answer."
1005
- lines = [f"Best score: {result.get('best_score', -1.0):.3f}", ""]
1006
- for i, d in enumerate(docs, 1):
1007
- q = d.metadata.get("question", "")
1008
- a = d.metadata.get("answer", "")
1009
- sim = d.metadata.get("sim_score", "N/A")
1010
- preview = a[:220] + ("..." if len(a) > 220 else "")
1011
- lines += [
1012
- f"Evidence {i}",
1013
- f"- Question: {q}",
1014
- f"- Similarity: {sim}",
1015
- f"- Preview: {preview}",
1016
- "",
1017
- ]
1018
- return "\n".join(lines).strip()
1019
 
 
 
 
 
 
 
1020
 
1021
- def clear_chat():
1022
- st = initialize_session()
1023
- return [], st, "", "No progress yet.", "No sources yet."
 
 
1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1025
 
1026
- def rebuild_store(session_state, chat_history):
1027
- global vectorstore
1028
- if not cfg.allow_rebuild_vectorstore:
1029
- chat_history = chat_history or []
1030
- chat_history.append({"role": "assistant", "content": "Vector store rebuild is disabled.", "metadata": {"title": "Restricted"}})
1031
- return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
1032
 
1033
- build_vectorstore()
1034
- vectorstore = load_vectorstore()
1035
- chat_history = chat_history or []
1036
- chat_history.append({"role": "assistant", "content": "✅ Vector store rebuilt.", "metadata": {"title": "Done"}})
1037
- return chat_history, session_state, "", progress_text(session_state.get("progress", new_progress_state())), format_sources(session_state.get("last_result"))
1038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
 
1040
- # ============================================================
1041
- # STREAMING RESPONSE
1042
- # ============================================================
1043
- def bot_respond_stream(chat_history, session_state):
1044
  if session_state is None:
1045
  session_state = initialize_session()
1046
 
1047
- if not chat_history:
1048
- yield chat_history, session_state, "", "No progress yet.", "No sources yet."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
  return
1050
 
1051
- user_message = str(chat_history[-1]["content"]).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1052
 
1053
- chat_history = add_assistant_placeholder(chat_history, "Thinking...")
1054
- yield chat_history, session_state, thinking_html("Understanding your message"), "Starting...", ""
1055
- time.sleep(0.4)
 
 
 
 
 
 
 
 
 
 
 
 
1056
 
1057
- yield chat_history, session_state, thinking_html("Detecting whether this is normal chat or ECG reasoning"), "Detecting intent...", ""
1058
- time.sleep(0.4)
 
 
 
 
 
 
1059
 
1060
- detected = classify_intent(user_message)
1061
- if detected == "NORMAL_CHAT":
1062
- yield chat_history, session_state, thinking_html("Normal chatbot mode active"), "Running normal chat...", ""
1063
- time.sleep(0.4)
1064
- else:
1065
- yield chat_history, session_state, thinking_html("ECG mode detected: retrieving evidence"), "Retrieving ECG evidence...", ""
1066
- time.sleep(0.45)
1067
- yield chat_history, session_state, thinking_html("Running local ECG adapter reasoning"), "Running local reasoning...", ""
1068
- time.sleep(0.45)
1069
- yield chat_history, session_state, thinking_html("Generating multiple summaries and composing final answer"), "Generating final answer...", ""
1070
- time.sleep(0.45)
1071
-
1072
- out = run_agentic_turn(user_message, session_state)
1073
- result = out["result"]
1074
- updated_session = out["session_state"]
1075
- answer = result.get("final_answer", "I could not generate an answer.")
1076
- sources = format_sources(result)
1077
- prog = result.get("progress_text", "No progress yet.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1078
 
1079
  if cfg.enable_typewriter_stream:
1080
- for partial in stream_text(answer, step=140):
1081
- chat_history = update_last_assistant_message(chat_history, partial, title="Answer")
1082
- yield chat_history, updated_session, "", prog, sources
 
 
 
 
 
 
 
 
 
 
1083
 
1084
- chat_history = update_last_assistant_message(chat_history, answer, title="Answer")
1085
- yield chat_history, updated_session, "", prog, sources
 
 
 
1086
 
 
 
 
 
 
 
 
1087
 
1088
- # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1089
  # APP
1090
- # ============================================================
1091
- with gr.Blocks(title="Agentic ECG Chatbot", css=CUSTOM_CSS) as demo:
1092
- gr.HTML(header_html())
 
 
 
 
 
 
 
 
 
 
 
1093
 
1094
  session_state = gr.State(initialize_session())
1095
 
1096
- chatbot = gr.Chatbot(
1097
- label="Chat",
1098
- elem_id="chatbot",
1099
- type="messages",
1100
- show_copy_button=True,
1101
- bubble_full_width=False,
1102
- )
 
 
 
 
1103
 
1104
- user_box = gr.Textbox(
1105
- label="Message",
1106
- placeholder="Ask anything. ECG / cardiology questions are detected automatically.",
1107
- lines=2,
1108
- autofocus=True,
1109
- )
1110
 
1111
- status_html = gr.HTML("")
1112
 
1113
- with gr.Row():
1114
- send_btn = gr.Button("Submit", variant="primary")
1115
- clear_btn = gr.Button("Clear")
 
1116
 
1117
- with gr.Accordion("Progress Log", open=False):
1118
- progress_panel = gr.Textbox(value="No progress yet.", lines=16, interactive=False)
 
 
 
 
 
1119
 
1120
- with gr.Accordion("Retrieved ECG Sources", open=False):
1121
- sources_panel = gr.Textbox(value="No sources yet.", lines=16, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1122
 
1123
  submit_event = user_box.submit(
1124
  fn=user_submit,
@@ -1126,33 +1676,41 @@ with gr.Blocks(title="Agentic ECG Chatbot", css=CUSTOM_CSS) as demo:
1126
  outputs=[user_box, chatbot],
1127
  queue=True,
1128
  )
 
1129
  submit_event.then(
1130
  fn=bot_respond_stream,
1131
  inputs=[chatbot, session_state],
1132
- outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
1133
  queue=True,
1134
  )
1135
 
1136
- send_event = send_btn.click(
1137
  fn=user_submit,
1138
  inputs=[user_box, chatbot],
1139
  outputs=[user_box, chatbot],
1140
  queue=True,
1141
  )
1142
- send_event.then(
 
1143
  fn=bot_respond_stream,
1144
  inputs=[chatbot, session_state],
1145
- outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
1146
  queue=True,
1147
  )
1148
 
1149
  clear_btn.click(
1150
  fn=clear_chat,
1151
  inputs=[],
1152
- outputs=[chatbot, session_state, status_html, progress_panel, sources_panel],
1153
  queue=False,
1154
  )
1155
 
 
 
 
 
 
 
1156
 
1157
  demo.queue(default_concurrency_limit=1)
1158
 
@@ -1161,4 +1719,4 @@ if __name__ == "__main__":
1161
  debug=cfg.launch_debug,
1162
  server_name=cfg.server_name,
1163
  server_port=cfg.server_port,
1164
- )
 
1
  import os
2
  import re
3
  import time
 
 
 
 
4
  import traceback
5
+ import logging
6
+ from typing import List, Dict, TypedDict, Optional
7
  from dataclasses import dataclass, field
8
 
9
  import torch
 
17
  from langchain_huggingface import HuggingFaceEmbeddings
18
  from langchain_community.vectorstores import FAISS
19
  from langchain_openai import ChatOpenAI
20
+ from langgraph.graph import StateGraph, START, END
21
 
22
  # ============================================================
23
+ # HUGGING FACE SPACES READY
24
+ # Medical CSV RAG Chatbot
25
+ # Mobile-friendly UI/UX version
26
+ # Pipeline: RAG retrieval -> local ECG adapter reasoning -> grounded summary
 
 
 
 
 
 
27
  # ============================================================
28
 
29
+ # -------------------------------
 
 
 
 
30
  # LOGGING
31
+ # -------------------------------
32
  logging.basicConfig(
33
  level=logging.INFO,
34
+ format="%(asctime)s - %(levelname)s - %(message)s"
35
  )
36
+ logger = logging.getLogger(__name__)
37
 
38
 
39
+ # -------------------------------
40
  # CONFIG
41
+ # -------------------------------
42
  @dataclass
43
  class Config:
44
+ base_model_path: str = os.getenv(
45
+ "BASE_MODEL_PATH",
46
+ "meta-llama/Llama-3.1-8B-Instruct"
47
+ )
48
+
49
+ adapter_dir: str = os.getenv(
50
+ "ADAPTER_DIR",
51
+ "adapter_refined_v10"
52
+ )
53
+ data_csv: str = os.getenv(
54
+ "DATA_CSV",
55
+ "RAGmaterials/ECG_RAG_only_clean.csv"
56
+ )
57
+ rag_dir: str = os.getenv(
58
+ "RAG_DIR",
59
+ "RAGmaterials"
60
+ )
61
  vectorstore_dir: str = field(init=False)
62
 
63
  hf_token: str = os.getenv("HF_TOKEN", "")
 
65
  deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
66
  deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
67
 
68
+ deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.1"))
69
+ deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "550"))
70
+
71
+ embed_model_name: str = os.getenv(
72
+ "EMBED_MODEL_NAME",
73
+ "sentence-transformers/all-MiniLM-L6-v2"
74
+ )
75
 
76
+ similarity_k: int = int(os.getenv("SIMILARITY_K", "12"))
77
  top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
78
+ max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5200"))
79
 
80
  max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
81
+ max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "180"))
82
  max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
83
 
84
+ min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.08"))
85
+ min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.20"))
86
+ strong_retrieval_threshold: float = float(os.getenv("STRONG_RETRIEVAL_THRESHOLD", "0.30"))
87
+ strong_retrieval_min_docs: int = int(os.getenv("STRONG_RETRIEVAL_MIN_DOCS", "3"))
 
88
 
89
+ use_query_cache: bool = os.getenv("USE_QUERY_CACHE", "true").lower() == "true"
90
  enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
91
+ enable_validator: bool = os.getenv("ENABLE_VALIDATOR", "true").lower() == "true"
92
  enable_typewriter_stream: bool = os.getenv("ENABLE_TYPEWRITER_STREAM", "true").lower() == "true"
93
+ show_debug_panel: bool = os.getenv("SHOW_DEBUG_PANEL", "true").lower() == "true"
94
  allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
95
 
96
+ use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
97
+
98
  launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
99
  server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
100
  server_port: int = int(os.getenv("SERVER_PORT", "7860"))
101
 
102
+ blink_stage_1: float = float(os.getenv("BLINK_STAGE_1", "0.40"))
103
+ blink_stage_2: float = float(os.getenv("BLINK_STAGE_2", "0.55"))
104
+ blink_stage_3: float = float(os.getenv("BLINK_STAGE_3", "0.50"))
105
+ blink_before_answer: float = float(os.getenv("BLINK_BEFORE_ANSWER", "0.25"))
106
+
107
  def __post_init__(self):
108
  self.vectorstore_dir = os.path.join(self.rag_dir, "faiss_store")
109
  os.makedirs(self.rag_dir, exist_ok=True)
110
 
111
  if not self.deepseek_api_key:
112
+ raise ValueError("Missing DEEPSEEK_API_KEY. Add it in Hugging Face Space Secrets.")
 
 
 
113
 
114
  for path, name in [
115
  (self.adapter_dir, "Adapter directory"),
 
123
  logger.info("Configuration loaded.")
124
 
125
 
126
+ # -------------------------------
127
  # PROMPTS
128
+ # -------------------------------
129
+ LOCAL_REASONING_SYSTEM = """
130
+ You are a strict medical reasoning assistant specialized for ECG and cardiology reasoning.
 
 
 
 
131
 
132
+ You are NOT the final answer generator.
133
+ You must analyze ONLY the supplied evidence and produce a short structured reasoning draft.
 
 
 
134
 
 
 
135
  Rules:
136
+ 1) Use only the provided evidence.
137
+ 2) Do not invent facts.
138
+ 3) Focus only on the user's exact question.
139
+ 4) Output exactly in this structure:
 
 
 
 
 
 
 
 
 
140
 
141
  KEY_FINDINGS:
142
  - ...
 
153
  LIMITS:
154
  - ...
155
 
156
+ 5) If evidence is insufficient, output exactly:
157
  INSUFFICIENT_EVIDENCE
158
  """.strip()
159
 
160
+ QUERY_EXPANSION_SYSTEM = """
161
+ You expand medical queries for retrieval.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ Rules:
164
+ 1) Preserve the user's intent.
165
+ 2) Add close medical paraphrases and alternate wording.
166
+ 3) Add likely medical synonyms, abbreviations, and alternate phrasing.
167
+ 4) Do not answer the question.
168
+ 5) Output only the expanded retrieval query.
169
  """.strip()
170
 
171
+ DEEPSEEK_SUMMARY_SYSTEM = """
172
+ You are an expert medical evidence summarizer.
 
 
 
 
 
 
 
 
173
 
174
+ Your job is to produce a clinically precise, well-structured answer grounded ONLY in:
175
+ 1. the retrieved evidence
176
+ 2. the local reasoning draft
177
 
178
+ You must be faithful to the provided material and answer the user's question directly, clearly, and conservatively.
 
179
 
180
+ PRIMARY OBJECTIVE
181
+ - Identify the user's main intent before writing:
182
+ definition, cause, symptoms, diagnosis, investigation, treatment, prognosis, or genetics.
183
+ - Prioritize that intent throughout the response.
184
+ - The first sentence of the Summary must directly answer the user's question in the most clinically relevant way.
185
 
186
+ GROUNDING RULES
187
+ - Use only information supported by the retrieved evidence and local reasoning draft.
188
+ - Do not add outside medical knowledge.
189
+ - Do not infer specific facts unless they are clearly supported.
190
+ - Do not invent treatments, diagnoses, risks, mechanisms, thresholds, statistics, timelines, monitoring plans, or prognosis details.
191
+ - If the evidence is incomplete, be explicit about what is missing.
192
+ - If the evidence is too weak to answer the question reliably, output exactly:
193
  INSUFFICIENT_EVIDENCE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ STYLE RULES
196
+ - Write in precise, professional clinical language.
197
+ - Be specific, not vague.
198
+ - Be concise, but fully informative.
199
+ - Avoid repetition, generic filler, and empty statements.
200
+ - Do not mention retrieval, prompts, system instructions, reasoning drafts, tools, pipelines, or internal processes.
201
+ - Do not include URLs or citations unless explicitly requested elsewhere.
202
+ - Do not overstate certainty.
203
+ - When appropriate, distinguish clearly between what is established, what is suggested, and what is not addressed by the evidence.
204
 
205
+ OUTPUT FORMAT
 
206
 
207
+ ### Summary
208
+ - Write 4 to 7 full sentences.
209
+ - This is the most important section.
210
+ - The first sentence must directly answer the user's question.
211
+ - Focus primarily on the user's main intent.
212
+ - Include only background information that improves understanding of the requested topic.
213
+ - Make the summary clinically useful, specific, and evidence-faithful.
214
 
215
+ ### Key Evidence Points
216
+ - Include 4 to 6 bullet points.
217
+ - Each bullet must state a concrete fact supported by the evidence.
218
+ - Prioritize clinically important facts over background detail.
219
+ - Avoid repeating the same idea in different words.
220
+
221
+ ### Clinical Implications / Recommendations
222
+ - Include 2 to 4 bullet points only if supported by the evidence.
223
+ - Focus on practical interpretation, management implications, follow-up considerations, or next steps.
224
+ - If the evidence supports recognition or framing rather than action, say that clearly.
225
+ - Do not recommend interventions not supported by the evidence.
226
+
227
+ ### Limitations of the Evidence
228
+ - State clearly what the evidence does not establish, does not cover, or leaves uncertain.
229
+ - Explicitly note when details are lacking on:
230
+ treatment, diagnosis, prognosis, genetics, monitoring, recurrence prevention, comparative effectiveness, or long-term outcomes.
231
+ - If the evidence is narrow, low-detail, or only partially aligned with the question, say so plainly.
232
+
233
+ SPECIAL INSTRUCTIONS BY QUESTION TYPE
234
+
235
+ For treatment questions:
236
+ - Focus primarily on treatment and management, not disease definition.
237
+ - Organize treatment information in this order whenever supported by the evidence:
238
+ 1. supportive or conservative care
239
+ 2. symptomatic drug therapy or procedural treatment
240
+ 3. long-term prevention, follow-up, or recurrence prevention
241
+ - Distinguish treatment of active symptoms from prevention of recurrence or complications.
242
+ - If the condition is benign, self-limited, or often does not require treatment, state that clearly in the first sentence.
243
+
244
+ For diagnosis or investigation questions:
245
+ - Focus on how the condition is identified, evaluated, or differentiated.
246
+ - Prioritize diagnostic features, testing approach, and clinically useful distinctions.
247
+ - Do not drift into treatment unless the evidence clearly supports it and it helps answer the question.
248
+
249
+ For cause or risk questions:
250
+ - Focus on etiologies, risk factors, mechanisms, or associations supported by the evidence.
251
+ - Distinguish established causes from possible contributors if the evidence is less certain.
252
+
253
+ For prognosis questions:
254
+ - Focus on expected course, complications, recurrence, or outcome-related information supported by the evidence.
255
+ - Do not add prognostic claims not explicitly supported.
256
+
257
+ QUALITY CHECK BEFORE OUTPUT
258
+ Before finalizing, ensure that:
259
+ - the first sentence directly answers the question
260
+ - the response matches the user's primary intent
261
+ - every important claim is grounded in the provided material
262
+ - no unsupported medical detail has been added
263
+ - the Limitations section honestly reflects evidence gaps
264
+
265
+ If these conditions cannot be met, output exactly:
266
  INSUFFICIENT_EVIDENCE
267
  """.strip()
268
 
269
+ VALIDATOR_SYSTEM = """
270
+ You are a strict medical evidence validator.
271
+
272
+ Your job is to compare the ANSWER against the EVIDENCE.
273
+
274
+ Rules:
275
+ 1) Mark SUPPORTED if the answer is well grounded in the evidence.
276
+ 2) Mark PARTLY_UNSUPPORTED if some claims are supported but others go beyond the evidence.
277
+ 3) Mark INSUFFICIENT_EVIDENCE if the answer is mostly unsupported or the evidence is too weak.
278
+ 4) Output only one short verdict line beginning with exactly one of:
279
+ SUPPORTED:
280
+ PARTLY_UNSUPPORTED:
281
+ INSUFFICIENT_EVIDENCE:
282
  """.strip()
283
 
284
 
285
+ # -------------------------------
286
  # HELPERS
287
+ # -------------------------------
288
  def clean_text(x: str) -> str:
289
  x = str(x).replace("\x00", " ").strip()
290
  x = re.sub(r"\s+", " ", x)
 
293
 
294
  def strip_bad_sections(txt: str) -> str:
295
  t = str(txt).strip()
296
+ cut_markers = [
297
+ "References:",
298
+ "Sources:",
299
+ "Source:",
300
+ "URLs:",
301
+ "This response is based",
302
+ "Please let me know",
303
+ "Is there anything else",
304
+ ]
305
+ for marker in cut_markers:
306
+ pos = t.lower().find(marker.lower())
307
+ if pos != -1:
308
+ t = t[:pos].strip()
309
+
310
  t = re.sub(r"https?://\S+|www\.\S+", "", t).strip()
311
  return t
312
 
 
314
  def infer_tags(question: str, answer: str) -> List[str]:
315
  text = f"{question} {answer}".lower()
316
  tags: List[str] = []
317
+
318
  keyword_map = {
319
+ "treatment": ["treat", "therapy", "management", "drug", "surgery"],
320
  "diagnosis": ["diagnosis", "diagnose", "criteria"],
321
+ "symptoms": ["symptom", "presentation", "sign", "feature"],
322
+ "ecg": ["ecg", "ekg", "st elevation", "qrs", "p wave", "arrhythmia", "tachycardia", "bradycardia"],
323
+ "investigation": ["test", "investigation", "mri", "ct", "lab", "imaging"],
324
+ "prognosis": ["prognosis", "outcome", "survival", "risk"],
325
+ "genetics": ["gene", "genetic", "mutation", "variant", "chromosome", "inherited", "inheritance"],
326
+ "etiology": ["cause", "causes", "caused by", "associated with", "risk factor"],
327
  }
328
+
329
  for tag, words in keyword_map.items():
330
  if any(w in text for w in words):
331
  tags.append(tag)
332
+
333
  return tags
334
 
335
 
 
353
  return len(q_words & t_words) / max(1, len(q_words))
354
 
355
 
356
+ def rerank_docs(query: str, docs: List[Document], top_n: Optional[int] = None) -> List[Document]:
357
+ if top_n is None:
358
+ top_n = cfg.top_k_final
359
+
360
+ q_words = set(re.findall(r"\w+", query.lower()))
361
+ scored = []
362
+
363
+ for d in docs:
364
+ question = d.metadata.get("question", "")
365
+ answer = d.metadata.get("answer", "")
366
+ tags = " ".join(d.metadata.get("tags", []))
367
+ text = f"{question} {answer} {tags}".lower()
368
+
369
+ t_words = set(re.findall(r"\w+", text))
370
+ overlap = len(q_words & t_words) / max(1, len(q_words))
371
+ question_boost = 0.20 if any(w in question.lower() for w in q_words) else 0.0
372
+ tag_boost = 0.10 if any(w in tags.lower() for w in q_words) else 0.0
373
+ sim_score = float(d.metadata.get("sim_score", 0.0))
374
+
375
+ final_score = overlap + question_boost + tag_boost + (0.35 * sim_score)
376
+ scored.append((d, final_score))
377
+
378
+ scored.sort(key=lambda x: x[1], reverse=True)
379
+ return [d for d, _ in scored[:top_n]]
380
+
381
+
382
  def history_to_text(chat_history: List[Dict[str, str]], max_turns: Optional[int] = None) -> str:
383
+ if max_turns is None:
384
+ max_turns = cfg.max_chat_history_turns
385
+
386
  items = chat_history[-max_turns:]
387
  if not items:
388
  return "[EMPTY]"
389
+
390
  return "\n".join([f"{m['role'].upper()}: {m['content']}" for m in items]).strip()
391
 
392
 
393
  def build_context_string(docs: List[Document], max_chars: Optional[int] = None) -> str:
394
+ if max_chars is None:
395
+ max_chars = cfg.max_context_chars
396
+
397
  blocks = []
398
  total = 0
399
+
400
  for i, d in enumerate(docs, 1):
401
  q = d.metadata.get("question", "")
402
  a = d.metadata.get("answer", "")
403
  tags = ", ".join(d.metadata.get("tags", [])) or "N/A"
404
+ sim = d.metadata.get("sim_score", None)
405
+
406
  block = f"""
407
  ==============================
408
  EVIDENCE_ID: {i}
409
  SOURCE_ID: {d.metadata.get('id')}
410
  SOURCE_QUESTION: {q}
411
  SOURCE_TAGS: {tags}
412
+ SIMILARITY: {sim if sim is not None else 'N/A'}
413
  EVIDENCE_TEXT:
414
  {a}
415
  ==============================
416
  """.strip()
417
+
418
  if total + len(block) > max_chars:
419
  break
420
+
421
  blocks.append(block)
422
  total += len(block) + 2
 
423
 
424
+ return "\n\n".join(blocks).strip()
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
 
427
+ def compute_confidence(result: Dict) -> float:
428
+ best_score = result.get("best_score", -1.0)
429
+ validation = result.get("validation_status", "")
 
 
430
 
431
+ if validation.startswith("SUPPORTED"):
432
+ conf = best_score
433
+ elif validation.startswith("PARTLY_UNSUPPORTED"):
434
+ conf = best_score * 0.70
435
+ else:
436
+ conf = best_score * 0.40
437
 
438
+ return max(0.0, min(1.0, conf))
 
 
439
 
440
 
441
+ def strong_retrieval(best_score: float, docs: List[Document]) -> bool:
442
+ return (
443
+ best_score >= cfg.strong_retrieval_threshold
444
+ and len(docs) >= cfg.strong_retrieval_min_docs
445
+ )
 
 
 
 
446
 
447
 
448
+ def stream_text(text: str, step: int = 110):
449
+ acc = ""
450
+ for i in range(0, len(text), step):
451
+ acc += text[i:i + step]
452
+ yield acc
453
 
454
 
455
+ # -------------------------------
456
  # EMBEDDINGS + VECTORSTORE
457
+ # -------------------------------
458
  logger.info("Loading embeddings...")
459
+ embeddings = HuggingFaceEmbeddings(model_name=cfg.embed_model_name)
 
 
 
 
 
 
 
460
 
461
 
462
  def build_vectorstore():
 
484
  "question": q,
485
  "answer": a,
486
  "tags": infer_tags(q, a),
487
+ }
488
  )
489
  )
490
 
 
509
  logger.info("Vectorstore ready.")
510
 
511
 
512
+ # -------------------------------
513
+ # LOCAL MODEL + ECG ADAPTER
514
+ # -------------------------------
515
  logger.info("Loading tokenizer...")
516
  tokenizer = AutoTokenizer.from_pretrained(
517
  cfg.base_model_path,
518
  use_fast=True,
519
+ token=cfg.hf_token if cfg.hf_token else None
520
  )
521
+
522
  if tokenizer.pad_token is None:
523
  tokenizer.pad_token = tokenizer.eos_token
524
 
 
559
 
560
  base_model.eval()
561
 
562
+ logger.info("Loading ECG reasoning adapter...")
563
  reason_model = PeftModel.from_pretrained(base_model, cfg.adapter_dir)
564
  reason_model.eval()
565
 
 
 
 
 
 
 
 
 
 
566
 
567
  def get_primary_model_device(model) -> torch.device:
568
  try:
 
571
  return torch.device("cuda" if torch.cuda.is_available() else "cpu")
572
 
573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
  @torch.inference_mode()
575
  def run_local_reasoner(user_query: str, context: str) -> str:
576
  try:
577
  messages = [
578
  {"role": "system", "content": LOCAL_REASONING_SYSTEM},
579
+ {
580
+ "role": "user",
581
+ "content": f"QUESTION:\n{user_query}\n\nEVIDENCE:\n{context if context.strip() else '[EMPTY]'}"
582
+ },
583
  ]
584
 
585
  prompt = tokenizer.apply_chat_template(
 
611
 
612
  gen_ids = out[0, inputs["input_ids"].shape[1]:]
613
  text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
614
+ text = strip_bad_sections(text)
615
+
616
+ return text if text else "INSUFFICIENT_EVIDENCE"
617
+
618
  except Exception as e:
619
  logger.error(f"Local reasoner error: {e}")
620
  traceback.print_exc()
621
  return "INSUFFICIENT_EVIDENCE"
622
 
623
 
624
+ # -------------------------------
625
+ # REMOTE LLM (DEEPSEEK)
626
+ # -------------------------------
627
+ deepseek_llm = ChatOpenAI(
628
+ model=cfg.deepseek_model,
629
+ api_key=cfg.deepseek_api_key,
630
+ base_url=cfg.deepseek_base_url,
631
+ temperature=cfg.deepseek_temperature,
632
+ max_tokens=cfg.deepseek_max_tokens,
633
+ )
634
 
635
+ _query_expansion_cache: Dict[str, str] = {}
 
636
 
 
 
637
 
638
+ def llm_text(system_prompt: str, user_prompt: str, fallback: str = "INSUFFICIENT_EVIDENCE") -> str:
639
+ try:
640
+ resp = deepseek_llm.invoke([
641
+ {"role": "system", "content": system_prompt},
642
+ {"role": "user", "content": user_prompt},
643
+ ])
644
+ text = resp.content if hasattr(resp, "content") else str(resp)
645
+ text = strip_bad_sections(text)
646
+ return text if text.strip() else fallback
647
+ except Exception as e:
648
+ logger.error(f"DeepSeek error: {e}")
649
+ traceback.print_exc()
650
+ return fallback
651
 
652
 
653
+ def run_query_expansion(user_query: str) -> str:
654
+ if not cfg.enable_query_expansion:
655
+ return user_query
 
656
 
657
+ if cfg.use_query_cache and user_query in _query_expansion_cache:
658
+ logger.info(f"Using cached expansion for: {user_query[:80]}")
659
+ return _query_expansion_cache[user_query]
660
 
661
+ prompt = f"""
662
+ USER_QUERY:
663
+ {user_query}
664
 
665
+ Expand this for retrieval with close medical phrasing, synonyms, and alternate wording.
666
+ Do not answer the question.
667
  """.strip()
668
+
669
+ expanded = llm_text(QUERY_EXPANSION_SYSTEM, prompt, fallback=user_query)
670
+ expanded = expanded.strip() if expanded else user_query
671
+
672
+ if cfg.use_query_cache:
673
+ _query_expansion_cache[user_query] = expanded
674
+
675
+ return expanded
676
 
677
 
678
+ def run_deepseek_summary(
679
+ user_query: str,
680
+ context: str,
681
+ reasoning_draft: str,
682
+ chat_history: List[Dict[str, str]],
683
+ ) -> str:
684
  prompt = f"""
685
+ CHAT_HISTORY:
686
+ {history_to_text(chat_history)}
687
+
688
  USER_QUESTION:
689
  {user_query}
690
 
691
  RETRIEVED_EVIDENCE:
692
  {context if context.strip() else '[EMPTY]'}
693
 
694
+ LOCAL_REASONING_DRAFT:
695
  {reasoning_draft if reasoning_draft.strip() else '[EMPTY]'}
696
 
697
+ Write a grounded final summary answer using only the evidence and reasoning draft.
 
 
 
 
698
  """.strip()
 
699
 
700
+ return llm_text(
701
+ DEEPSEEK_SUMMARY_SYSTEM,
702
+ prompt,
703
+ fallback="I could not generate a grounded summary from the retrieved evidence."
704
+ )
705
+
706
+
707
+ def run_validator(context: str, answer: str) -> str:
708
+ if not cfg.enable_validator:
709
+ return "SUPPORTED (validator disabled)"
710
 
 
711
  prompt = f"""
712
+ EVIDENCE:
713
+ {context if context.strip() else '[EMPTY]'}
714
 
715
+ ANSWER:
716
+ {answer if answer.strip() else '[EMPTY]'}
717
  """.strip()
 
718
 
719
+ return llm_text(VALIDATOR_SYSTEM, prompt, fallback="PARTLY_UNSUPPORTED: validator unavailable")
720
 
721
+
722
+ # -------------------------------
723
  # WARMUP
724
+ # -------------------------------
725
  def warmup_models():
726
  logger.info("Warming up local reasoner...")
727
  try:
 
732
  EVIDENCE_ID: 1
733
  SOURCE_QUESTION: What are ECG findings in hyperkalemia?
734
  SOURCE_TAGS: ecg
 
735
  EVIDENCE_TEXT:
736
  Hyperkalemia may cause peaked T waves, PR prolongation, QRS widening, and severe conduction abnormalities.
737
  ==============================
 
742
  logger.warning(f"Warmup failed: {e}")
743
 
744
 
745
+ warmup_models()
 
746
 
747
 
748
+ # -------------------------------
749
  # STATE
750
+ # -------------------------------
751
+ class ChatState(TypedDict, total=False):
752
  user_query: str
 
 
 
753
  expanded_query: str
754
+ chat_history: List[Dict[str, str]]
755
 
756
  retrieved_docs: List[Document]
757
  best_score: float
758
+ used_context: bool
759
  context: str
760
+ retrieval_attempts: int
761
+ retrieval_mode: str
762
 
763
+ reasoning_draft: str
 
 
764
  final_answer: str
765
+ validation_status: str
766
 
767
 
768
+ # -------------------------------
769
  # RETRIEVAL
770
+ # -------------------------------
771
+ def retrieve_docs_once(query_for_search: str, original_query: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
772
  try:
773
+ scored = vectorstore.similarity_search_with_score(
774
+ query_for_search,
775
+ k=cfg.similarity_k,
776
+ )
777
  except Exception as e:
778
  logger.error(f"Retriever error: {e}")
779
  traceback.print_exc()
 
783
  return [], -1.0
784
 
785
  filtered_docs = []
786
+ best_score = -1.0
787
+
788
  for doc, raw_score in scored:
789
  sim = score_to_similarity(raw_score)
790
+ best_score = max(best_score, sim)
791
+
792
  q = doc.metadata.get("question", "")
793
  a = doc.metadata.get("answer", "")
794
  ov = lexical_overlap(original_query, f"{q} {a}")
795
 
796
+ if ov >= cfg.min_lexical_overlap and sim >= cfg.min_faiss_similarity:
797
  new_doc = Document(page_content=doc.page_content, metadata=dict(doc.metadata))
798
  new_doc.metadata["sim_score"] = sim
799
  new_doc.metadata["lexical_overlap"] = ov
800
  filtered_docs.append(new_doc)
801
 
802
  reranked = rerank_docs(original_query, filtered_docs, top_n=cfg.top_k_final)
 
803
  return reranked, best_score
804
 
805
 
806
+ # -------------------------------
807
+ # LANGGRAPH NODES
808
+ # -------------------------------
809
+ def retrieve_node(state: ChatState) -> ChatState:
810
+ query = state.get("expanded_query") or state["user_query"]
811
+ retrieval_attempts = int(state.get("retrieval_attempts", 0)) + 1
812
+ retrieval_mode = "expanded" if state.get("expanded_query") else "original"
813
+
814
+ docs, best_score = retrieve_docs_once(
815
+ query_for_search=query,
816
+ original_query=state["user_query"],
817
+ )
818
+
819
+ if not docs:
820
+ return {
821
+ "retrieved_docs": [],
822
+ "best_score": best_score,
823
+ "used_context": False,
824
+ "context": "",
825
+ "retrieval_attempts": retrieval_attempts,
826
+ "retrieval_mode": retrieval_mode,
827
+ }
828
+
829
+ return {
830
+ "retrieved_docs": docs,
831
+ "best_score": best_score,
832
+ "used_context": True,
833
+ "context": build_context_string(docs, max_chars=cfg.max_context_chars),
834
+ "retrieval_attempts": retrieval_attempts,
835
+ "retrieval_mode": retrieval_mode,
836
+ }
837
+
838
+
839
+ def should_retry_retrieval(state: ChatState) -> str:
840
+ used_context = state.get("used_context", False)
841
+ best_score = state.get("best_score", -1.0)
842
+ attempts = int(state.get("retrieval_attempts", 0))
843
+
844
+ if used_context and best_score >= cfg.min_faiss_similarity:
845
+ return "local_reasoning"
846
+
847
  if not cfg.enable_query_expansion:
848
+ return "local_reasoning"
849
 
850
+ if attempts >= 2:
851
+ return "local_reasoning"
852
 
853
+ return "expand_query"
 
 
 
 
 
 
854
 
 
 
 
855
 
856
+ def expand_query_node(state: ChatState) -> ChatState:
857
+ expanded = run_query_expansion(state["user_query"])
858
+ if not expanded.strip():
859
+ expanded = state["user_query"]
860
+ return {"expanded_query": expanded}
861
 
 
 
 
 
 
 
 
 
 
862
 
863
+ def local_reasoning_node(state: ChatState) -> ChatState:
864
+ context = state.get("context", "").strip()
865
+ if not context:
866
+ return {"reasoning_draft": "INSUFFICIENT_EVIDENCE"}
867
 
868
+ reasoning = run_local_reasoner(state["user_query"], context)
869
+ return {"reasoning_draft": reasoning}
 
870
 
 
 
871
 
872
+ def generate_node(state: ChatState) -> ChatState:
873
+ context = state.get("context", "").strip()
874
+ reasoning = state.get("reasoning_draft", "INSUFFICIENT_EVIDENCE")
875
+ history = state.get("chat_history", [])
876
 
877
+ if not context:
878
+ return {"final_answer": "I could not find sufficiently relevant evidence in the RAG database for this question."}
 
879
 
880
+ answer = run_deepseek_summary(
881
+ user_query=state["user_query"],
882
+ context=context,
883
+ reasoning_draft=reasoning,
884
+ chat_history=history,
885
+ )
886
+ return {"final_answer": answer}
887
+
888
+
889
+ def validate_node(state: ChatState) -> ChatState:
890
+ context = state.get("context", "").strip()
891
+ answer = state.get("final_answer", "").strip()
892
+ best_score = state.get("best_score", -1.0)
893
+ docs = state.get("retrieved_docs", [])
894
+
895
+ if not context or not answer:
896
+ return {"validation_status": "INSUFFICIENT_EVIDENCE: missing context or answer"}
897
+
898
+ if strong_retrieval(best_score, docs):
899
+ return {"validation_status": "SUPPORTED (validator skipped due to strong retrieval)"}
900
+
901
+ verdict = run_validator(context, answer)
902
+
903
+ if verdict.startswith("SUPPORTED"):
904
+ return {"validation_status": verdict}
905
+
906
+ if verdict.startswith("PARTLY_UNSUPPORTED"):
907
+ return {
908
+ "validation_status": verdict,
909
+ "final_answer": answer + "\n\nEvidence limits: some parts may not be fully supported by the retrieved evidence."
910
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
 
912
+ if verdict.startswith("INSUFFICIENT_EVIDENCE"):
913
+ return {
914
+ "validation_status": verdict,
915
+ "final_answer": answer + "\n\nEvidence limits: the retrieved evidence was weak or only partially relevant."
916
+ }
917
 
918
+ return {"validation_status": verdict}
919
+
920
+
921
+ def finalize_node(state: ChatState) -> ChatState:
922
+ answer = strip_bad_sections(state.get("final_answer", ""))
923
+ if not answer:
924
+ answer = "I could not generate an answer."
925
+ return {"final_answer": answer}
926
+
927
+
928
+ # -------------------------------
929
+ # GRAPH
930
+ # -------------------------------
931
+ builder = StateGraph(ChatState)
932
+ builder.add_node("retrieve", retrieve_node)
933
+ builder.add_node("expand_query", expand_query_node)
934
+ builder.add_node("local_reasoning", local_reasoning_node)
935
+ builder.add_node("generate", generate_node)
936
+ builder.add_node("validate", validate_node)
937
+ builder.add_node("finalize", finalize_node)
938
+
939
+ builder.add_edge(START, "retrieve")
940
+ builder.add_conditional_edges(
941
+ "retrieve",
942
+ should_retry_retrieval,
943
+ {
944
+ "expand_query": "expand_query",
945
+ "local_reasoning": "local_reasoning",
946
+ }
947
+ )
948
+ builder.add_edge("expand_query", "retrieve")
949
+ builder.add_edge("local_reasoning", "generate")
950
+ builder.add_edge("generate", "validate")
951
+ builder.add_edge("validate", "finalize")
952
+ builder.add_edge("finalize", END)
953
 
954
+ graph = builder.compile()
955
+ logger.info("LangGraph compiled.")
956
 
 
 
957
 
958
+ # -------------------------------
959
+ # FORMATTING HELPERS
960
+ # -------------------------------
961
+ def format_sources_minimal(result: Optional[Dict]) -> str:
962
+ if not result:
963
+ return "## Retrieved Sources\n\nNo sources yet."
 
 
 
 
 
 
964
 
965
+ docs = result.get("retrieved_docs", [])
966
+ best_score = result.get("best_score", -1.0)
 
 
 
967
 
968
+ if not docs:
969
+ return (
970
+ "## Retrieved Sources\n\n"
971
+ "No sufficiently relevant evidence retrieved.\n\n"
972
+ f"**Best score:** `{best_score:.3f}`"
973
+ )
974
 
975
+ lines = [
976
+ "## Retrieved Sources",
977
+ f"**Best score:** `{best_score:.3f}`",
978
+ "",
979
+ ]
980
 
981
+ for i, d in enumerate(docs, 1):
982
+ question = d.metadata.get("question", "")
983
+ answer = d.metadata.get("answer", "")
984
+ similarity = d.metadata.get("sim_score", "N/A")
985
+ preview = answer[:210].strip()
986
+ if len(answer) > 210:
987
+ preview += "..."
988
+
989
+ lines.extend([
990
+ f"### Evidence {i}",
991
+ f"- **Question:** {question}",
992
+ f"- **Similarity:** `{similarity}`",
993
+ f"- **Preview:** {preview}",
994
+ "",
995
+ ])
996
+
997
+ return "\n".join(lines)
998
+
999
+
1000
+ def format_debug_text(result: Optional[Dict]) -> str:
1001
+ if not result:
1002
+ return "No debug result yet."
1003
+
1004
+ return f"""
1005
+ BEST SCORE: {result.get('best_score', -1.0)}
1006
+ USED CONTEXT: {result.get('used_context', False)}
1007
+ RETRIEVAL ATTEMPTS: {result.get('retrieval_attempts', 0)}
1008
+ RETRIEVAL MODE: {result.get('retrieval_mode', 'N/A')}
1009
+ VALIDATION STATUS: {result.get('validation_status', 'N/A')}
1010
+
1011
+ ----- CONTEXT -----
1012
+ {result.get('context', '')}
1013
+
1014
+ ----- LOCAL REASONING DRAFT -----
1015
+ {result.get('reasoning_draft', '')}
1016
+ """.strip()
1017
+
1018
+
1019
+ # -------------------------------
1020
  # UI HELPERS
1021
+ # -------------------------------
1022
  CUSTOM_CSS = """
1023
+ :root {
1024
+ --bg-main: #07111f;
1025
+ --bg-soft: #0b1728;
1026
+ --card: rgba(10, 19, 35, 0.86);
1027
+ --card-2: rgba(14, 25, 43, 0.94);
1028
+ --border: rgba(148, 163, 184, 0.16);
1029
+ --text: #e5eefb;
1030
+ --muted: #94a3b8;
1031
+ --primary: #7c3aed;
1032
+ --primary-2: #2563eb;
1033
+ --success: #10b981;
1034
+ }
1035
+
1036
  html, body, .gradio-container {
1037
  margin: 0 !important;
1038
  padding: 0 !important;
1039
+ min-height: 100%;
1040
+ background:
1041
+ radial-gradient(circle at top left, rgba(124,58,237,0.22), transparent 28%),
1042
+ radial-gradient(circle at top right, rgba(37,99,235,0.18), transparent 24%),
1043
+ linear-gradient(180deg, #050b16 0%, #091321 100%);
1044
+ color: var(--text);
1045
  }
1046
+
1047
  .gradio-container {
1048
+ max-width: 100% !important;
1049
+ padding: 12px !important;
 
1050
  }
1051
+
1052
+ footer {
1053
+ visibility: hidden;
1054
+ }
1055
+
1056
+ .top-card {
1057
+ border: 1px solid var(--border);
1058
+ background: linear-gradient(135deg, rgba(11,23,40,0.95), rgba(18,31,56,0.92));
1059
+ border-radius: 22px;
1060
  padding: 16px;
1061
  margin-bottom: 12px;
1062
+ box-shadow: 0 14px 40px rgba(0,0,0,0.20);
1063
  }
1064
+
1065
+ .hero-title {
1066
+ font-size: 1.6rem;
1067
  font-weight: 800;
1068
+ color: #f8fbff;
1069
  margin-bottom: 6px;
1070
+ line-height: 1.15;
1071
  }
1072
+
1073
+ .hero-subtitle {
1074
  color: #cbd5e1;
1075
+ font-size: 0.95rem;
1076
+ line-height: 1.5;
1077
+ }
1078
+
1079
+ .badges {
1080
+ display: flex;
1081
+ gap: 8px;
1082
+ flex-wrap: wrap;
1083
+ margin-top: 12px;
1084
+ }
1085
+
1086
+ .badge {
1087
+ display: inline-flex;
1088
+ align-items: center;
1089
+ gap: 6px;
1090
+ padding: 6px 10px;
1091
+ border-radius: 999px;
1092
+ font-size: 11px;
1093
+ color: #e6eefc;
1094
+ border: 1px solid rgba(255,255,255,0.12);
1095
+ background: rgba(255,255,255,0.06);
1096
+ }
1097
+
1098
+ .panel-wrap {
1099
+ border: 1px solid var(--border);
1100
+ background: linear-gradient(180deg, rgba(10,19,35,0.96), rgba(7,14,26,0.94));
1101
+ border-radius: 20px;
1102
+ padding: 12px;
1103
+ box-shadow: 0 16px 45px rgba(0,0,0,0.22);
1104
  }
1105
+
1106
  #chatbot {
1107
+ height: min(62vh, 640px) !important;
1108
+ min-height: 360px !important;
1109
  border-radius: 18px !important;
1110
+ border: 1px solid var(--border) !important;
1111
+ overflow: hidden !important;
1112
+ box-shadow: 0 14px 40px rgba(0,0,0,0.26) !important;
1113
  }
1114
+
1115
+ .status-card {
 
 
1116
  padding: 12px 14px;
1117
+ border-radius: 16px;
1118
+ background: linear-gradient(135deg, #0f172a 0%, #172554 100%);
1119
+ color: #f9fafb;
1120
+ font-size: 14px;
1121
+ border: 1px solid rgba(255,255,255,0.12);
1122
+ box-shadow: 0 10px 30px rgba(0,0,0,0.2);
1123
  }
1124
+
1125
+ .muted {
1126
+ color: #a5b4fc;
1127
+ font-size: 12px;
1128
+ }
1129
+
1130
+ .blink-dots {
1131
+ font-size: 22px;
1132
  font-weight: 800;
1133
+ letter-spacing: 4px;
1134
  animation: blinkDots 1s steps(1, end) infinite;
1135
+ display: inline-block;
1136
+ padding: 2px 0;
1137
  }
1138
+
1139
  @keyframes blinkDots {
1140
  0% { opacity: 1; }
1141
+ 50% { opacity: 0.15; }
1142
  100% { opacity: 1; }
1143
  }
1144
+
1145
  textarea, .gr-textbox textarea {
1146
+ border-radius: 16px !important;
1147
+ font-size: 15px !important;
1148
+ }
1149
+
1150
+ .gr-textbox label, .gr-markdown, .gr-button {
1151
+ font-size: 14px !important;
1152
  }
1153
+
1154
  button {
1155
  border-radius: 14px !important;
1156
  min-height: 44px !important;
1157
  font-weight: 600 !important;
1158
  }
1159
+
1160
+ .mobile-stack {
1161
+ display: flex;
1162
+ flex-direction: column;
1163
+ gap: 12px;
1164
+ }
1165
+
1166
+ .mobile-scroll {
1167
+ max-height: 34vh;
1168
+ overflow-y: auto;
1169
+ }
1170
+
1171
+ .command-note {
1172
+ color: #cbd5e1;
1173
+ font-size: 0.88rem;
1174
+ line-height: 1.45;
1175
+ }
1176
+
1177
+ @media (max-width: 1024px) {
1178
+ .gradio-container {
1179
+ padding: 10px !important;
1180
+ }
1181
+
1182
+ .hero-title {
1183
+ font-size: 1.45rem;
1184
+ }
1185
+
1186
+ .hero-subtitle {
1187
+ font-size: 0.92rem;
1188
+ }
1189
+
1190
+ #chatbot {
1191
+ height: 56vh !important;
1192
+ }
1193
+ }
1194
+
1195
+ @media (max-width: 768px) {
1196
+ .gradio-container {
1197
+ padding: 8px !important;
1198
+ }
1199
+
1200
+ .top-card {
1201
+ padding: 14px;
1202
+ border-radius: 18px;
1203
+ }
1204
+
1205
+ .hero-title {
1206
+ font-size: 1.28rem;
1207
+ }
1208
+
1209
+ .hero-subtitle {
1210
+ font-size: 0.88rem;
1211
+ line-height: 1.45;
1212
+ }
1213
+
1214
+ .badge {
1215
+ font-size: 10px;
1216
+ padding: 5px 8px;
1217
+ }
1218
+
1219
+ .panel-wrap {
1220
+ padding: 10px;
1221
+ border-radius: 16px;
1222
+ }
1223
+
1224
+ #chatbot {
1225
+ height: 52vh !important;
1226
+ min-height: 320px !important;
1227
+ border-radius: 16px !important;
1228
+ }
1229
+
1230
+ button {
1231
+ width: 100% !important;
1232
+ }
1233
+
1234
+ .mobile-scroll {
1235
+ max-height: 240px;
1236
+ }
1237
+ }
1238
+
1239
+ @media (max-width: 480px) {
1240
+ .hero-title {
1241
+ font-size: 1.15rem;
1242
+ }
1243
+
1244
+ .hero-subtitle {
1245
+ font-size: 0.83rem;
1246
+ }
1247
+
1248
+ #chatbot {
1249
+ height: 50vh !important;
1250
+ min-height: 300px !important;
1251
+ }
1252
+
1253
+ textarea, .gr-textbox textarea {
1254
+ font-size: 14px !important;
1255
+ }
1256
+ }
1257
  """
1258
 
1259
 
1260
+ def hero_html() -> str:
1261
  return """
1262
+ <div class="top-card">
1263
+ <div class="hero-title">🫀 Mr Cardio</div>
1264
+ <div class="hero-subtitle">
1265
+ ECG-focused clinical chatbot with RAG retrieval, local ECG reasoning,
1266
+ and grounded evidence summaries. Mobile-friendly layout included.
1267
+ </div>
1268
+ <div class="badges">
1269
+ <div class="badge">ECG Reasoning</div>
1270
+ <div class="badge">FAISS Retrieval</div>
1271
+ <div class="badge">LoRA Adapter</div>
1272
+ <div class="badge">Validated Output</div>
1273
  </div>
1274
  </div>
1275
  """
 
1277
 
1278
  def thinking_html(stage: str) -> str:
1279
  return f"""
1280
+ <div class="status-card">
1281
+ <div style="display:flex;align-items:center;gap:12px;">
1282
+ <div style="font-size:19px;"></div>
1283
+ <div>
1284
+ <div style="font-weight:700;">{stage}</div>
1285
+ <div class="muted">Retrieval → reasoning → grounded answer</div>
1286
+ <div class="blink-dots">...</div>
1287
+ </div>
1288
+ </div>
1289
  </div>
1290
  """
1291
 
1292
 
1293
+ def initialize_session():
1294
+ return {"chat_history": [], "last_result": None}
1295
+
1296
+
1297
+ def add_assistant_placeholder(history, text="..."):
1298
  history = history or []
1299
+ history.append({
1300
+ "role": "assistant",
1301
+ "content": text,
1302
+ "metadata": {"title": "Thinking"}
1303
+ })
1304
  return history
1305
 
1306
 
1307
+ def update_last_assistant_message(history, text, title=None):
1308
  history = history or []
1309
  if not history or history[-1]["role"] != "assistant":
1310
+ msg = {"role": "assistant", "content": text}
1311
+ if title:
1312
+ msg["metadata"] = {"title": title}
1313
+ history.append(msg)
1314
  return history
1315
+
1316
+ history[-1] = {"role": "assistant", "content": text}
1317
+ if title:
1318
+ history[-1]["metadata"] = {"title": title}
1319
  return history
1320
 
1321
 
1322
+ def user_submit(user_message, chat_ui_history):
1323
+ chat_ui_history = chat_ui_history or []
1324
  user_message = (user_message or "").strip()
1325
+
1326
  if not user_message:
1327
+ return "", chat_ui_history
 
 
1328
 
1329
+ chat_ui_history.append({"role": "user", "content": user_message})
1330
+ return "", chat_ui_history
1331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1332
 
1333
+ # -------------------------------
1334
+ # CORE CHAT
1335
+ # -------------------------------
1336
+ def run_chat_turn(user_message: str, memory_state: Dict) -> Dict:
1337
+ if memory_state is None:
1338
+ memory_state = {"chat_history": [], "last_result": None}
1339
 
1340
+ state_in = {
1341
+ "user_query": user_message,
1342
+ "chat_history": memory_state["chat_history"],
1343
+ "retrieval_attempts": 0,
1344
+ }
1345
 
1346
+ try:
1347
+ result = graph.invoke(state_in)
1348
+ except Exception as e:
1349
+ logger.error(f"Graph invocation error: {e}")
1350
+ traceback.print_exc()
1351
+ result = {
1352
+ "final_answer": f"I hit a runtime error while processing the request: {e}",
1353
+ "best_score": -1.0,
1354
+ "used_context": False,
1355
+ "validation_status": "ERROR",
1356
+ "retrieved_docs": [],
1357
+ "context": "",
1358
+ "reasoning_draft": "",
1359
+ "retrieval_attempts": 0,
1360
+ "retrieval_mode": "error",
1361
+ }
1362
 
1363
+ answer = result.get("final_answer", "").strip() or "I could not generate an answer."
1364
+ best_score = result.get("best_score", -1.0)
1365
+ validation_status = result.get("validation_status", "N/A")
1366
+ confidence = compute_confidence(result)
 
 
1367
 
1368
+ answer_with_footer = (
1369
+ f"{answer}\n\n---\n"
1370
+ f"📊 confidence={confidence:.2f} | best_score={best_score:.3f} | validation={validation_status}"
1371
+ )
 
1372
 
1373
+ memory_state["chat_history"].append({"role": "user", "content": user_message})
1374
+ memory_state["chat_history"].append({"role": "assistant", "content": answer})
1375
+ memory_state["chat_history"] = memory_state["chat_history"][-12:]
1376
+ memory_state["last_result"] = result
1377
+
1378
+ return {
1379
+ "answer": answer_with_footer,
1380
+ "memory_state": memory_state,
1381
+ "sources_markdown": format_sources_minimal(result),
1382
+ "debug_text": format_debug_text(result),
1383
+ }
1384
+
1385
+
1386
+ def bot_respond_stream(chat_ui_history, session_state):
1387
+ global vectorstore
1388
 
 
 
 
 
1389
  if session_state is None:
1390
  session_state = initialize_session()
1391
 
1392
+ if not chat_ui_history:
1393
+ yield (
1394
+ chat_ui_history,
1395
+ session_state,
1396
+ "## Retrieved Sources\n\nNo sources yet.",
1397
+ "No debug result yet.",
1398
+ ""
1399
+ )
1400
+ return
1401
+
1402
+ user_message = str(chat_ui_history[-1]["content"]).strip()
1403
+
1404
+ if user_message == "/sources":
1405
+ result = session_state.get("last_result")
1406
+ chat_ui_history.append({
1407
+ "role": "assistant",
1408
+ "content": format_sources_minimal(result),
1409
+ "metadata": {"title": "Sources"}
1410
+ })
1411
+ yield (
1412
+ chat_ui_history,
1413
+ session_state,
1414
+ format_sources_minimal(result),
1415
+ format_debug_text(result),
1416
+ ""
1417
+ )
1418
  return
1419
 
1420
+ if user_message == "/debug":
1421
+ result = session_state.get("last_result")
1422
+ chat_ui_history.append({
1423
+ "role": "assistant",
1424
+ "content": format_debug_text(result),
1425
+ "metadata": {"title": "Debug"}
1426
+ })
1427
+ yield (
1428
+ chat_ui_history,
1429
+ session_state,
1430
+ format_sources_minimal(result),
1431
+ format_debug_text(result),
1432
+ ""
1433
+ )
1434
+ return
1435
 
1436
+ if user_message == "/rebuild":
1437
+ if not cfg.allow_rebuild_vectorstore:
1438
+ chat_ui_history.append({
1439
+ "role": "assistant",
1440
+ "content": "Vector store rebuild is disabled on this Space.",
1441
+ "metadata": {"title": "Restricted"}
1442
+ })
1443
+ yield (
1444
+ chat_ui_history,
1445
+ session_state,
1446
+ format_sources_minimal(session_state.get("last_result")),
1447
+ format_debug_text(session_state.get("last_result")),
1448
+ ""
1449
+ )
1450
+ return
1451
 
1452
+ chat_ui_history = add_assistant_placeholder(chat_ui_history)
1453
+ yield (
1454
+ chat_ui_history,
1455
+ session_state,
1456
+ "",
1457
+ "",
1458
+ thinking_html("Rebuilding vector store")
1459
+ )
1460
 
1461
+ time.sleep(cfg.blink_stage_1)
1462
+
1463
+ chat_ui_history = update_last_assistant_message(
1464
+ chat_ui_history,
1465
+ "Rebuilding vector store and reloading embeddings...",
1466
+ title="Maintenance"
1467
+ )
1468
+ yield (
1469
+ chat_ui_history,
1470
+ session_state,
1471
+ "",
1472
+ "",
1473
+ thinking_html("Rebuilding vector store")
1474
+ )
1475
+
1476
+ build_vectorstore()
1477
+ vectorstore = load_vectorstore()
1478
+
1479
+ chat_ui_history = update_last_assistant_message(
1480
+ chat_ui_history,
1481
+ "✅ Vector store rebuilt and reloaded.",
1482
+ title="Done"
1483
+ )
1484
+ yield (
1485
+ chat_ui_history,
1486
+ session_state,
1487
+ format_sources_minimal(session_state.get("last_result")),
1488
+ format_debug_text(session_state.get("last_result")),
1489
+ ""
1490
+ )
1491
+ return
1492
+
1493
+ chat_ui_history = add_assistant_placeholder(chat_ui_history, text="...")
1494
+ yield (
1495
+ chat_ui_history,
1496
+ session_state,
1497
+ "",
1498
+ "",
1499
+ thinking_html("Starting")
1500
+ )
1501
+ time.sleep(cfg.blink_stage_1)
1502
+
1503
+ yield (
1504
+ chat_ui_history,
1505
+ session_state,
1506
+ "",
1507
+ "",
1508
+ thinking_html("Retrieving evidence")
1509
+ )
1510
+ time.sleep(cfg.blink_stage_2)
1511
+
1512
+ yield (
1513
+ chat_ui_history,
1514
+ session_state,
1515
+ "",
1516
+ "",
1517
+ thinking_html("Running ECG adapter reasoning")
1518
+ )
1519
+ time.sleep(cfg.blink_stage_3)
1520
+
1521
+ out = run_chat_turn(user_message, session_state)
1522
+
1523
+ yield (
1524
+ chat_ui_history,
1525
+ session_state,
1526
+ out["sources_markdown"],
1527
+ out["debug_text"],
1528
+ thinking_html("Generating grounded summary")
1529
+ )
1530
+ time.sleep(cfg.blink_before_answer)
1531
 
1532
  if cfg.enable_typewriter_stream:
1533
+ for partial in stream_text(out["answer"], step=120):
1534
+ chat_ui_history = update_last_assistant_message(
1535
+ chat_ui_history,
1536
+ partial,
1537
+ title="Answer"
1538
+ )
1539
+ yield (
1540
+ chat_ui_history,
1541
+ session_state,
1542
+ out["sources_markdown"],
1543
+ out["debug_text"],
1544
+ ""
1545
+ )
1546
 
1547
+ chat_ui_history = update_last_assistant_message(
1548
+ chat_ui_history,
1549
+ out["answer"],
1550
+ title="Answer"
1551
+ )
1552
 
1553
+ yield (
1554
+ chat_ui_history,
1555
+ out["memory_state"],
1556
+ out["sources_markdown"],
1557
+ out["debug_text"],
1558
+ ""
1559
+ )
1560
 
1561
+
1562
+ def clear_chat():
1563
+ return [], initialize_session(), "## Retrieved Sources\n\nNo sources yet.", "No debug result yet.", ""
1564
+
1565
+
1566
+ def rebuild_from_button(session_state, chatbot_history):
1567
+ global vectorstore
1568
+
1569
+ if not cfg.allow_rebuild_vectorstore:
1570
+ chatbot_history = chatbot_history or []
1571
+ chatbot_history.append({
1572
+ "role": "assistant",
1573
+ "content": "Vector store rebuild is disabled on this Space.",
1574
+ "metadata": {"title": "Restricted"}
1575
+ })
1576
+ return (
1577
+ chatbot_history,
1578
+ session_state,
1579
+ format_sources_minimal(session_state.get("last_result")),
1580
+ format_debug_text(session_state.get("last_result")),
1581
+ ""
1582
+ )
1583
+
1584
+ build_vectorstore()
1585
+ vectorstore = load_vectorstore()
1586
+
1587
+ chatbot_history = chatbot_history or []
1588
+ chatbot_history.append({
1589
+ "role": "assistant",
1590
+ "content": "✅ Vector store rebuilt and reloaded.",
1591
+ "metadata": {"title": "Done"}
1592
+ })
1593
+
1594
+ return (
1595
+ chatbot_history,
1596
+ session_state,
1597
+ format_sources_minimal(session_state.get("last_result")),
1598
+ format_debug_text(session_state.get("last_result")),
1599
+ ""
1600
+ )
1601
+
1602
+
1603
+ # -------------------------------
1604
  # APP
1605
+ # -------------------------------
1606
+ with gr.Blocks(
1607
+ title="Medical CSV RAG Chatbot",
1608
+ css=CUSTOM_CSS,
1609
+ theme=gr.themes.Soft(
1610
+ primary_hue="indigo",
1611
+ secondary_hue="blue",
1612
+ neutral_hue="slate",
1613
+ radius_size="lg",
1614
+ text_size="md",
1615
+ ),
1616
+ ) as demo:
1617
+
1618
+ gr.HTML(hero_html())
1619
 
1620
  session_state = gr.State(initialize_session())
1621
 
1622
+ with gr.Column(elem_classes=["mobile-stack"]):
1623
+ with gr.Group(elem_classes=["panel-wrap"]):
1624
+ chatbot = gr.Chatbot(
1625
+ label="Clinical Chat",
1626
+ height=640,
1627
+ elem_id="chatbot",
1628
+ type="messages",
1629
+ show_copy_button=True,
1630
+ bubble_full_width=False,
1631
+ avatar_images=(None, None),
1632
+ )
1633
 
1634
+ user_box = gr.Textbox(
1635
+ label="Ask a medical question",
1636
+ placeholder="e.g. What are the ECG findings in hyperkalemia?",
1637
+ lines=2,
1638
+ autofocus=True,
1639
+ )
1640
 
1641
+ status_html = gr.HTML("")
1642
 
1643
+ with gr.Row():
1644
+ send_btn = gr.Button("Send", variant="primary")
1645
+ clear_btn = gr.Button("Clear")
1646
+ rebuild_btn = gr.Button("Rebuild Store")
1647
 
1648
+ gr.HTML(
1649
+ """
1650
+ <div class="command-note">
1651
+ Commands: <code>/sources</code>, <code>/debug</code>, <code>/rebuild</code>
1652
+ </div>
1653
+ """
1654
+ )
1655
 
1656
+ with gr.Accordion("Retrieved Sources", open=False):
1657
+ with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
1658
+ sources_panel = gr.Markdown("## Retrieved Sources\n\nNo sources yet.")
1659
+
1660
+ if cfg.show_debug_panel:
1661
+ with gr.Accordion("Debug Panel", open=False):
1662
+ with gr.Group(elem_classes=["panel-wrap", "mobile-scroll"]):
1663
+ debug_panel = gr.Textbox(
1664
+ label="Debug",
1665
+ value="No debug result yet.",
1666
+ lines=18,
1667
+ max_lines=28,
1668
+ interactive=False,
1669
+ )
1670
+ else:
1671
+ debug_panel = gr.Textbox(visible=False, value="")
1672
 
1673
  submit_event = user_box.submit(
1674
  fn=user_submit,
 
1676
  outputs=[user_box, chatbot],
1677
  queue=True,
1678
  )
1679
+
1680
  submit_event.then(
1681
  fn=bot_respond_stream,
1682
  inputs=[chatbot, session_state],
1683
+ outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
1684
  queue=True,
1685
  )
1686
 
1687
+ send_click = send_btn.click(
1688
  fn=user_submit,
1689
  inputs=[user_box, chatbot],
1690
  outputs=[user_box, chatbot],
1691
  queue=True,
1692
  )
1693
+
1694
+ send_click.then(
1695
  fn=bot_respond_stream,
1696
  inputs=[chatbot, session_state],
1697
+ outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
1698
  queue=True,
1699
  )
1700
 
1701
  clear_btn.click(
1702
  fn=clear_chat,
1703
  inputs=[],
1704
+ outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
1705
  queue=False,
1706
  )
1707
 
1708
+ rebuild_btn.click(
1709
+ fn=rebuild_from_button,
1710
+ inputs=[session_state, chatbot],
1711
+ outputs=[chatbot, session_state, sources_panel, debug_panel, status_html],
1712
+ queue=True,
1713
+ )
1714
 
1715
  demo.queue(default_concurrency_limit=1)
1716
 
 
1719
  debug=cfg.launch_debug,
1720
  server_name=cfg.server_name,
1721
  server_port=cfg.server_port,
1722
+ )