CanerDedeoglu
/

Rapid_ECG

@@ -1,21 +1,27 @@
 # -*- coding: utf-8 -*-
 """
-PULSE ECG Handler — Deterministic JSON→Narrative (age+sex aware)
-- Model still processes image (LLaVA/transformers)
-- output_mode="json"      → returns structured JSON (single model call)
-- output_mode="report_en" → JSON + table + narrative (derived deterministically from JSON; still single model call)
-- output_mode="narrative" → classic narrative paragraph (model free-form)
-Notes:
-- For "json" and "report_en" modes we prompt the model with a strict JSON schema hint.
-- Age group ("0-15" | "15-65" | "65+") and sex ("male" | "female") are accepted from payload
-  and used only in deterministic narrative rendering (not sent to the model).
 """
 import os
 import re
 import json
 import base64
 import hashlib
 import datetime
 from io import BytesIO
@@ -26,7 +32,7 @@ import torch
 from PIL import Image
 import requests
-# ==== Debug helpers ====
 def _env_bool(name: str, default: bool = False) -> bool:
     v = os.getenv(name)
     if v is None:
@@ -42,7 +48,7 @@ def dbg(*args, **kwargs):
 def warn(*args, **kwargs):
     print("[WARN]", *args, **kwargs)
-# ==== LLaVA & Transformers ====
 try:
     from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
     from llava.conversation import conv_templates, SeparatorStyle
@@ -61,7 +67,7 @@ except Exception as e:
     TRANSFORMERS_AVAILABLE = False
     warn(f"transformers not available: {e}")
-# ==== HF Hub logging (optional) ====
 try:
     from huggingface_hub import HfApi, login
     HF_HUB_AVAILABLE = True
@@ -77,13 +83,12 @@ if HF_HUB_AVAILABLE and "HF_TOKEN" in os.environ:
         repo_name = os.environ.get("LOG_REPO", "")
     except Exception as e:
         warn(f"[HF Hub] init failed: {e}")
-        api = None
-        repo_name = ""
 LOGDIR = "./logs"
 os.makedirs(LOGDIR, exist_ok=True)
-# ==== Global state ====
 tokenizer = None
 model = None
 image_processor = None
@@ -91,7 +96,7 @@ context_len = None
 args = None
 model_initialized = False
-# ==== Prompts ====
 STYLE_HINT = (
     "Write one concise narrative paragraph that covers rhythm, heart rate, cardiac axis, "
     "P waves and PR interval, QRS morphology and duration, ST segments, T waves, and QT/QTc. "
@@ -100,28 +105,26 @@ STYLE_HINT = (
     "followed by a succinct, comma-separated summary of the key diagnoses."
 )
 JSON_SCHEMA_HINT_EN = """
-Return ONLY a valid JSON object that matches EXACTLY this schema:
 {
-  "heart_rate_bpm": int | null,
-  "rhythm": "string",
-  "qrs_axis": "string",
-  "p_waves": "string",
-  "pr_interval_ms": int | null,
-  "qrs_duration_ms": int | null,
-  "t_waves": "string",
-  "qtc_ms": int | null,
-  "qtc_comment": "string",
-  "additional_comments": "string"
 }
-Rules:
-- Output MUST be valid JSON with no extra text before or after.
-- Units: use integers for bpm and ms where applicable.
-- If unknown, use null for numeric fields and empty string for text fields.
-- Use standard cardiology terminology in English.
 """
-# ===================== Utilities =====================
 def _safe_upload(path: str):
     if api and repo_name and path and os.path.isfile(path):
         try:
@@ -139,6 +142,9 @@ def _conv_log_path() -> str:
     return os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
 def load_image_any(image_input: Union[str, dict]) -> Image.Image:
     if isinstance(image_input, str):
         s = image_input.strip()
         if s.startswith(("http://", "https://")):
@@ -165,8 +171,111 @@ def _normalize_whitespace(text: str) -> str:
 def _postprocess_min(text: str) -> str:
     return _normalize_whitespace(text)
-# ====== Vision helpers ======
 def get_vision_expected_size(m, default: int = 336) -> int:
     try:
         vt = m.get_vision_tower()
         vt_cfg = getattr(getattr(vt, "vision_tower", vt), "config", None)
@@ -182,30 +291,31 @@ def get_vision_expected_size(m, default: int = 336) -> int:
     return default
 def force_processor_size(proc, size: int):
     try:
         if hasattr(proc, "size"):
             if isinstance(proc.size, dict):
                 proc.size["shortest_edge"] = size
             else:
                 try:
-                    proc.size.shortest_edge = size
                 except Exception:
                     proc.size = {"shortest_edge": size}
         if hasattr(proc, "crop_size"):
             if isinstance(proc.crop_size, dict):
                 proc.crop_size["height"] = size
-                proc.crop_size["width"] = size
             else:
                 try:
-                    proc.crop_size.height = size
-                    proc.crop_size.width = size
                 except Exception:
                     proc.crop_size = {"height": size, "width": size}
         dbg(f"[processor] forced size={size}")
     except Exception as e:
         warn(f"[processor] force size failed: {e}")
-# ====== Stop Criteria ======
 class SafeKeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keyword: str, tokenizer):
         tok = tokenizer(keyword, add_special_tokens=False, return_tensors="pt").input_ids[0]
@@ -220,7 +330,7 @@ class SafeKeywordsStoppingCriteria(StoppingCriteria):
         tail = out[-n:]
         return torch.equal(tail, self.kw_ids.to(tail.device))
-# ===================== Core =====================
 class InferenceDemo:
     def __init__(self, args, model_path, tokenizer_, model_, image_processor_, context_len_):
         if not LLAVA_AVAILABLE:
@@ -260,10 +370,10 @@ def _build_prompt_and_ids(chatbot, user_text: str, device: torch.device):
     ).unsqueeze(0).to(device)
     return prompt, input_ids
-# ===================== Deterministic renderers =====================
 def render_ecg_table_en(d: Dict[str, Any]) -> str:
     lines = ["ECG ANALYSIS", "────────────"]
-    if "heart_rate_bpm" in d and d["heart_rate_bpm"] is not None:
         lines.append(f"Heart rate        : {d['heart_rate_bpm']} beats/min")
     if "rhythm" in d:
         lines.append(f"Rhythm            : {d['rhythm']}")
@@ -271,24 +381,20 @@ def render_ecg_table_en(d: Dict[str, Any]) -> str:
         lines.append(f"QRS axis          : {d['qrs_axis']}")
     if "p_waves" in d:
         lines.append(f"P waves           : {d['p_waves']}")
-    if "pr_interval_ms" in d and d["pr_interval_ms"] is not None:
         lines.append(f"PR interval       : {d['pr_interval_ms']} ms")
-    if "qrs_duration_ms" in d and d["qrs_duration_ms"] is not None:
         lines.append(f"QRS duration      : {d['qrs_duration_ms']} ms")
     if "t_waves" in d:
         lines.append(f"T waves           : {d['t_waves']}")
-    if "qtc_ms" in d and d["qtc_ms"] is not None:
-        qtc_c = d.get("qtc_comment", "").strip()
-        qtc_c = qtc_c if qtc_c else "—"
         lines.append(f"QTc               : {qtc_c} ({d['qtc_ms']} ms)")
-    lines.append("")
-    lines.append("Additional comments")
-    lines.append("──────────────────")
-    lines.append(d.get("additional_comments", "").strip())
     return "\n".join(lines)
 def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
-    """Deterministic narrative based on JSON + age_group + sex"""
     hr = d.get("heart_rate_bpm")
     rhythm = d.get("rhythm")
     axis = d.get("qrs_axis")
@@ -327,9 +433,17 @@ def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
     elif sex:
         para.append(f"The patient is {sex}.")
     if rhythm:
-        para.append(f"The electrocardiogram shows {rhythm.lower()}.")
     if isinstance(hr, int):
         if hr < hr_low:
             hr_comment = "bradycardia"
@@ -339,12 +453,11 @@ def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
             hr_comment = "within normal range"
         para.append(f"The heart rate is {hr} bpm ({hr_comment}).")
     if axis:
         para.append(f"The QRS axis is {axis.lower()}.")
     if p:
         para.append(f"P waves are {p.lower()}.")
     if isinstance(pr, int):
         if pr < pr_low:
             pr_comment = "short PR interval"
@@ -353,17 +466,11 @@ def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
         else:
             pr_comment = "within normal range"
         para.append(f"PR interval is {pr} ms ({pr_comment}).")
     if isinstance(qrs_dur, int):
-        if qrs_dur >= qrs_limit:
-            qrs_comment = "prolonged QRS (possible conduction delay)"
-        else:
-            qrs_comment = "normal QRS duration"
         para.append(f"QRS duration is {qrs_dur} ms ({qrs_comment}).")
     if t:
         para.append(f"T waves: {t}.")
     if isinstance(qtc, int):
         if sex == "male":
             if qtc > qtc_male:
@@ -393,6 +500,7 @@ def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
     paragraph = " ".join(para).strip()
     sci_bits = []
     if rhythm: sci_bits.append(rhythm)
     if axis: sci_bits.append(f"QRS axis: {axis}")
@@ -403,7 +511,7 @@ def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
     return paragraph + "\n\n" + "Structured clinical impression: " + ", ".join(sci_bits)
-# ===================== Generation =====================
 def generate_response(
     message_text: str,
     image_input,
@@ -428,36 +536,28 @@ def generate_response(
     if max_new_tokens is None: max_new_tokens = 4096
     if repetition_penalty is None: repetition_penalty = 1.0
     dbg(f"[gen] temp={temperature} top_p={top_p} max_new={max_new_tokens} rep={repetition_penalty} mode={output_mode}")
     chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
     if conv_mode_override and conv_mode_override in conv_templates:
         chatbot.conversation = conv_templates[conv_mode_override].copy()
-    # Load image
     try:
         pil_img = load_image_any(image_input)
     except Exception as e:
         return {"error": f"Failed to load image: {e}"}
-    # Save image (log)
-    img_hash, img_path = "NA", None
-    try:
-        buf = BytesIO(); pil_img.save(buf, format="JPEG"); raw = buf.getvalue()
-        img_hash = hashlib.md5(raw).hexdigest()
-        t = datetime.datetime.now()
-        img_path = os.path.join(LOGDIR, "serve_images", f"{t.year:04d}-{t.month:02d}-{t.day:02d}", f"{img_hash}.jpg")
-        os.makedirs(os.path.dirname(img_path), exist_ok=True)
-        if not os.path.isfile(img_path):
-            pil_img.save(img_path)
-    except Exception as e:
-        warn(f"[log] save image failed: {e}")
     device = next(chatbot.model.parameters()).device
     dtype = torch.float16
-    # Preprocess image → tensor
-    expected_size = get_vision_expected_size(chatbot.model, default=336)
     image_tensor = None
     try:
         if hasattr(chatbot.image_processor, "preprocess"):
@@ -471,7 +571,6 @@ def generate_response(
         else:
             raise AttributeError("processor has no preprocess")
     except Exception:
-        # Fallback chain: process_images → manual CLIP norm
         try:
             processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
             if isinstance(processed, (list, tuple)) and len(processed) > 0:
@@ -486,6 +585,7 @@ def generate_response(
         except Exception:
             from torchvision import transforms
             from torchvision.transforms import InterpolationMode
             preprocess = transforms.Compose([
                 transforms.Resize(expected_size, interpolation=InterpolationMode.BICUBIC),
                 transforms.CenterCrop(expected_size),
@@ -500,13 +600,14 @@ def generate_response(
     if image_tensor is None:
         return {"error": "Image processing failed (no tensor produced)"}
-    # Prompt selection
     base_msg = (message_text or "").strip()
     if output_mode in ("json", "report_en"):
         msg = f"{base_msg}\n\n{JSON_SCHEMA_HINT_EN}"
-    else:  # narrative
         msg = f"{base_msg}\n\n{STYLE_HINT}"
     _, input_ids = _build_prompt_and_ids(chatbot, msg, device)
     stop_str = chatbot.conversation.sep if chatbot.conversation.sep_style != SeparatorStyle.TWO else chatbot.conversation.sep2
@@ -522,12 +623,13 @@ def generate_response(
         except Exception:
             pass
     streamer = TextIteratorStreamer(chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         inputs=input_ids,
         images=image_tensor,
         streamer=streamer,
-        do_sample=True,
         temperature=float(temperature),
         top_p=float(top_p),
         max_new_tokens=int(max_new_tokens),
@@ -536,7 +638,6 @@ def generate_response(
         stopping_criteria=[stopping],
     )
-    # Generate
     try:
         t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
         t.start()
@@ -548,36 +649,28 @@ def generate_response(
     except Exception as e:
         return {"error": f"Generation failed: {e}"}
-    # Log
-    try:
-        row = {
-            "time": datetime.datetime.now().isoformat(),
-            "type": "chat",
-            "model": "PULSE-7B",
-            "state": [(message_text, text)],
-            "image_hash": img_hash,
-            "image_path": img_path or "",
-        }
-        with open(_conv_log_path(), "a", encoding="utf-8") as f:
-            f.write(json.dumps(row, ensure_ascii=False) + "\n")
-        _safe_upload(_conv_log_path()); _safe_upload(img_path or "")
-    except Exception as e:
-        warn(f"[log] failed: {e}")
-    # Output modes
     if output_mode == "narrative":
         return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
-    # For json & report_en we need to parse JSON once
     try:
         start = text.find("{"); end = text.rfind("}")
         if start == -1 or end == -1 or end <= start:
-            return {"error": "JSON block not found", "raw": text}
         data = json.loads(text[start:end+1])
-    except Exception as e:
-        return {"error": f"JSON parse failed: {e}", "raw": text}
-    # Inject patient metadata (not sent to model; used for deterministic narrative)
     if patient_age_group:
         data["patient_age_group"] = patient_age_group
     if patient_sex:
@@ -598,7 +691,7 @@ def generate_response(
     # Fallback
     return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
-# ===================== Public API =====================
 def query(payload: dict):
     global model_initialized, tokenizer, model, image_processor, context_len, args
     if not model_initialized:
@@ -621,7 +714,7 @@ def query(payload: dict):
         det_seed           = payload.get("det_seed", None)
         output_mode        = payload.get("output_mode", "narrative")
-        # Optional patient meta
         patient_age_group  = payload.get("patient_age_group")
         patient_sex        = payload.get("patient_sex")
@@ -663,7 +756,7 @@ def get_model_info():
         "device": str(next(model.parameters()).device) if model else "Unknown",
     }
-# ===================== Init & Session =====================
 class _Args:
     def __init__(self):
         self.model_path = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
@@ -689,6 +782,7 @@ def initialize_model():
         tokenizer_, model_, image_processor_, context_len_ = load_pretrained_model(
             args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
         )
         try:
             _ = next(model_.parameters()).device
@@ -696,33 +790,37 @@ def initialize_model():
             if torch.cuda.is_available():
                 model_ = model_.to(torch.device("cuda"))
         model_.eval()
         expected_size = get_vision_expected_size(model_, default=336)
-        if image_processor_ is None:
-            try:
-                from transformers import AutoProcessor
-                image_processor_ = AutoProcessor.from_pretrained(args.model_path)
-            except Exception:
-                from transformers import CLIPImageProcessor
-                clip_id = "openai/clip-vit-large-patch14-336" if expected_size >= 336 else "openai/clip-vit-large-patch14"
-                image_processor_ = CLIPImageProcessor.from_pretrained(clip_id)
-        force_processor_size(image_processor_, expected_size)
         globals()["tokenizer"] = tokenizer_
         globals()["model"] = model_
         globals()["image_processor"] = image_processor_
         globals()["context_len"] = context_len_
         chat_manager.init_if_needed(args, args.model_path, tokenizer_, model_, image_processor_, context_len_)
-        print("[init] model/tokenizer/image_processor loaded.]")
         return True
     except Exception as e:
         warn(f"[init] failed: {e}")
         return False
-# ===================== EndpointHandler =====================
 class EndpointHandler:
-    """Hugging Face Endpoint compatible"""
     def __init__(self, model_dir):
         self.model_dir = model_dir
         print(f"EndpointHandler initialized with model_dir: {model_dir}")
@@ -736,9 +834,9 @@ class EndpointHandler:
         return get_model_info()
 if __name__ == "__main__":
-    print("Handler ready (Deterministic JSON→Narrative, age+sex aware). Use `EndpointHandler` or `query`.")
-# ===================== FastAPI Wrapper =====================
 try:
     from fastapi import FastAPI
     from pydantic import BaseModel
@@ -748,7 +846,7 @@ except Exception as e:
     warn(f"fastapi/pydantic not available: {e}")
 if FASTAPI_AVAILABLE:
-    app = FastAPI(title="PULSE ECG Handler API", version="1.2.0")
     class QueryIn(BaseModel):
         message: str | None = None
@@ -801,5 +899,4 @@ if FASTAPI_AVAILABLE:
         data["output_mode"] = "report_en"
         return query(data)
 else:
-    app = None

 # -*- coding: utf-8 -*-
 """
+PULSE ECG Handler — Deterministic JSON → Table + Narrative (age+sex aware) with Robust Fallbacks
+Modes
+- output_mode="json"       → returns structured JSON (single model call)
+- output_mode="report_en"  → returns JSON + table + deterministic narrative (single model call)
+- output_mode="narrative"  → classic free-form model narrative (STYLE_HINT used)
+Highlights
+- Age group ("0-15" | "15-65" | "65+") and sex ("male" | "female") are accepted in payload and are
+  used only in deterministic narrative rendering (not sent to the model).
+- Robust JSON parsing:
+    1) direct JSON slice
+    2) cleanup pseudo-JSON (_coerce_pseudo_json)
+    3) regex-based field extraction from free text (_extract_fields_from_text)
+- Safe stop criteria, dynamic vision-size processor, logging hooks (optional HF Hub upload).
 """
 import os
 import re
 import json
 import base64
+import math
 import hashlib
 import datetime
 from io import BytesIO
 from PIL import Image
 import requests
+# ========= Debug Helpers =========
 def _env_bool(name: str, default: bool = False) -> bool:
     v = os.getenv(name)
     if v is None:
 def warn(*args, **kwargs):
     print("[WARN]", *args, **kwargs)
+# ========= LLaVA & Transformers =========
 try:
     from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
     from llava.conversation import conv_templates, SeparatorStyle
     TRANSFORMERS_AVAILABLE = False
     warn(f"transformers not available: {e}")
+# ========= (Optional) HF Hub logging =========
 try:
     from huggingface_hub import HfApi, login
     HF_HUB_AVAILABLE = True
         repo_name = os.environ.get("LOG_REPO", "")
     except Exception as e:
         warn(f"[HF Hub] init failed: {e}")
+        api, repo_name = None, ""
 LOGDIR = "./logs"
 os.makedirs(LOGDIR, exist_ok=True)
+# ========= Global State =========
 tokenizer = None
 model = None
 image_processor = None
 args = None
 model_initialized = False
+# ========= Prompts =========
 STYLE_HINT = (
     "Write one concise narrative paragraph that covers rhythm, heart rate, cardiac axis, "
     "P waves and PR interval, QRS morphology and duration, ST segments, T waves, and QT/QTc. "
     "followed by a succinct, comma-separated summary of the key diagnoses."
 )
+# Example-only schema (no type hints). The model copies this structure.
 JSON_SCHEMA_HINT_EN = """
+Return ONLY a valid JSON object. Do not include comments, types, or extra text.
+If a value is unknown, use null (for numbers) or "" (for strings).
 {
+  "heart_rate_bpm": 100,
+  "rhythm": "Sinus rhythm",
+  "qrs_axis": "Normal",
+  "p_waves": "Normal",
+  "pr_interval_ms": 160,
+  "qrs_duration_ms": 90,
+  "t_waves": "Normal",
+  "qtc_ms": 420,
+  "qtc_comment": "Normal",
+  "additional_comments": ""
 }
 """
+# ========= Utilities =========
 def _safe_upload(path: str):
     if api and repo_name and path and os.path.isfile(path):
         try:
     return os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
 def load_image_any(image_input: Union[str, dict]) -> Image.Image:
+    """
+    Supports: http(s) URL, local path, base64 (with or without data URL prefix), or {"image": <...>}
+    """
     if isinstance(image_input, str):
         s = image_input.strip()
         if s.startswith(("http://", "https://")):
 def _postprocess_min(text: str) -> str:
     return _normalize_whitespace(text)
+def _coerce_pseudo_json(text: str) -> str:
+    """
+    Coerce pseudo-JSON (e.g., 'int | none', 'none', Python booleans) into valid JSON string.
+    """
+    if not isinstance(text, str):
+        return ""
+    s = text
+    # Keep only the outermost JSON object if stray tokens are around
+    i, j = s.find("{"), s.rfind("}")
+    if i != -1 and j != -1 and j > i:
+        s = s[i:j+1]
+    # Remove type-like hints → replace with valid JSON placeholders
+    s = re.sub(r':\s*int\s*\|\s*none', ': null', s, flags=re.I)
+    s = re.sub(r':\s*string\s*\|\s*none', ': ""', s, flags=re.I)
+    # Python/other tokens → JSON
+    s = re.sub(r'\bNone\b|\bnone\b', 'null', s, flags=re.I)
+    s = re.sub(r'\bTrue\b', 'true', s)
+    s = re.sub(r'\bFalse\b', 'false', s)
+    # Strip inline comments
+    s = re.sub(r'//.*', '', s)   # JS style
+    s = re.sub(r'#.*', '', s)    # Python style
+    # Collapse repeated commas
+    s = re.sub(r',\s*,+', ',', s)
+    return s.strip()
+def _to_int_or_none(x: Optional[str]) -> Optional[int]:
+    if x is None:
+        return None
+    x = x.strip()
+    if not x:
+        return None
+    try:
+        v = int(float(x))
+        if math.isnan(v):
+            return None
+        return v
+    except Exception:
+        return None
+def _extract_fields_from_text(text: str) -> Dict[str, Any]:
+    """
+    Extract fields from free text when model failed to return valid JSON.
+    Missing numeric fields -> None; missing text -> "".
+    """
+    if not isinstance(text, str):
+        text = str(text or "")
+    def rex(pattern, flags=re.I):
+        m = re.search(pattern, text, flags)
+        return m.group(1).strip() if m else None
+    # bpm
+    hr = rex(r"(?:heart\s*rate|hr)\s*[:=]?\s*(\d{1,3})\s*(?:bpm|beats?/min)?")
+    if hr is None:
+        hr = rex(r"\b(\d{2,3})\s*(?:bpm|beats?/min)\b")
+    # PR/QRS/QTc ms
+    pr  = rex(r"\bPR\s*(?:interval)?\s*[:=]?\s*(\d{2,4})\s*ms\b")
+    qrs = rex(r"\bQRS\s*(?:duration)?\s*[:=]?\s*(\d{2,4})\s*ms\b")
+    qtc = rex(r"\bQTc?\s*[:=]?\s*(\d{2,4})\s*ms\b")
+    # Axis
+    axis = rex(r"\bQRS\s*axis\s*[:=]?\s*([+\-]?\d+°|normal|left|right|indeterminate)\b")
+    # Rhythm
+    rhythm = rex(r"\brhythm\s*[:=]?\s*([A-Za-z \-]+)")
+    if rhythm is None:
+        rhythm = rex(r"\b(sinus\s+(?:tachycardia|bradycardia|rhythm)|atrial fibrillation|afib|atrial flutter|junctional rhythm)\b")
+    # P / T waves
+    p_waves = rex(r"\bP\s*waves?\s*[:=]?\s*([A-Za-z0-9, \-]+)")
+    t_waves = rex(r"\bT\s*waves?\s*[:=]?\s*([A-Za-z0-9, \-]+)")
+    # QTc comment
+    qtc_comment = rex(r"\bQTc\s*(?:comment|status)?\s*[:=]?\s*([A-Za-z \-]+)")
+    # Additional
+    additional = rex(r"(?:Additional\s*comments|Notes?)\s*[:\-]?\s*([\s\S]{0,300})")
+    if not additional:
+        additional = rex(r"\b(ST[- ](?:elevation|depression)|S1Q3T3|early repolarization|strain pattern)\b(?:[^\n\r]{0,120})")
+    return {
+        "heart_rate_bpm": _to_int_or_none(hr),
+        "rhythm": (rhythm or "").strip(),
+        "qrs_axis": (axis or "").strip(),
+        "p_waves": (p_waves or "").strip(),
+        "pr_interval_ms": _to_int_or_none(pr),
+        "qrs_duration_ms": _to_int_or_none(qrs),
+        "t_waves": (t_waves or "").strip(),
+        "qtc_ms": _to_int_or_none(qtc),
+        "qtc_comment": (qtc_comment or "").strip(),
+        "additional_comments": (additional or "").strip(),
+    }
+# ========= Vision helpers =========
 def get_vision_expected_size(m, default: int = 336) -> int:
+    """
+    Return expected image size for the model vision tower if available.
+    """
     try:
         vt = m.get_vision_tower()
         vt_cfg = getattr(getattr(vt, "vision_tower", vt), "config", None)
     return default
 def force_processor_size(proc, size: int):
+    """Force processor resize/crop to target size safely."""
     try:
         if hasattr(proc, "size"):
             if isinstance(proc.size, dict):
                 proc.size["shortest_edge"] = size
             else:
                 try:
+                    proc.size.shortest_edge = size  # type: ignore[attr-defined]
                 except Exception:
                     proc.size = {"shortest_edge": size}
         if hasattr(proc, "crop_size"):
             if isinstance(proc.crop_size, dict):
                 proc.crop_size["height"] = size
+                proc.crop_size["width"]  = size
             else:
                 try:
+                    proc.crop_size.height = size  # type: ignore[attr-defined]
+                    proc.crop_size.width  = size  # type: ignore[attr-defined]
                 except Exception:
                     proc.crop_size = {"height": size, "width": size}
         dbg(f"[processor] forced size={size}")
     except Exception as e:
         warn(f"[processor] force size failed: {e}")
+# ========= Safe Stopper =========
 class SafeKeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keyword: str, tokenizer):
         tok = tokenizer(keyword, add_special_tokens=False, return_tensors="pt").input_ids[0]
         tail = out[-n:]
         return torch.equal(tail, self.kw_ids.to(tail.device))
+# ========= Core Session =========
 class InferenceDemo:
     def __init__(self, args, model_path, tokenizer_, model_, image_processor_, context_len_):
         if not LLAVA_AVAILABLE:
     ).unsqueeze(0).to(device)
     return prompt, input_ids
+# ========= Deterministic Renderers =========
 def render_ecg_table_en(d: Dict[str, Any]) -> str:
     lines = ["ECG ANALYSIS", "────────────"]
+    if d.get("heart_rate_bpm") is not None:
         lines.append(f"Heart rate        : {d['heart_rate_bpm']} beats/min")
     if "rhythm" in d:
         lines.append(f"Rhythm            : {d['rhythm']}")
         lines.append(f"QRS axis          : {d['qrs_axis']}")
     if "p_waves" in d:
         lines.append(f"P waves           : {d['p_waves']}")
+    if d.get("pr_interval_ms") is not None:
         lines.append(f"PR interval       : {d['pr_interval_ms']} ms")
+    if d.get("qrs_duration_ms") is not None:
         lines.append(f"QRS duration      : {d['qrs_duration_ms']} ms")
     if "t_waves" in d:
         lines.append(f"T waves           : {d['t_waves']}")
+    if d.get("qtc_ms") is not None:
+        qtc_c = (d.get("qtc_comment") or "").strip() or "—"
         lines.append(f"QTc               : {qtc_c} ({d['qtc_ms']} ms)")
+    lines += ["", "Additional comments", "──────────────────", (d.get("additional_comments") or "").strip()]
     return "\n".join(lines)
 def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
+    """Deterministic narrative based on JSON + age_group + sex with 'Structured clinical impression' at the end."""
     hr = d.get("heart_rate_bpm")
     rhythm = d.get("rhythm")
     axis = d.get("qrs_axis")
     elif sex:
         para.append(f"The patient is {sex}.")
+    # Rhythm with age-adjusted normalization for sinus tachycardia
     if rhythm:
+        if rhythm.lower() == "sinus tachycardia" and isinstance(hr, int) and hr_low <= hr <= hr_high:
+            para.append(
+                f"The electrocardiogram shows sinus rhythm, normal for age. "
+                f"Although labelled as sinus tachycardia, the heart rate of {hr} bpm is within the normal range for this age group."
+            )
+        else:
+            para.append(f"The electrocardiogram shows {rhythm.lower()}.")
+    # Heart rate comment
     if isinstance(hr, int):
         if hr < hr_low:
             hr_comment = "bradycardia"
             hr_comment = "within normal range"
         para.append(f"The heart rate is {hr} bpm ({hr_comment}).")
+    # Axis / P / PR / QRS / T / QTc
     if axis:
         para.append(f"The QRS axis is {axis.lower()}.")
     if p:
         para.append(f"P waves are {p.lower()}.")
     if isinstance(pr, int):
         if pr < pr_low:
             pr_comment = "short PR interval"
         else:
             pr_comment = "within normal range"
         para.append(f"PR interval is {pr} ms ({pr_comment}).")
     if isinstance(qrs_dur, int):
+        qrs_comment = "normal QRS duration" if qrs_dur < qrs_limit else "prolonged QRS (possible conduction delay)"
         para.append(f"QRS duration is {qrs_dur} ms ({qrs_comment}).")
     if t:
         para.append(f"T waves: {t}.")
     if isinstance(qtc, int):
         if sex == "male":
             if qtc > qtc_male:
     paragraph = " ".join(para).strip()
+    # Structured clinical impression (deterministic summary)
     sci_bits = []
     if rhythm: sci_bits.append(rhythm)
     if axis: sci_bits.append(f"QRS axis: {axis}")
     return paragraph + "\n\n" + "Structured clinical impression: " + ", ".join(sci_bits)
+# ========= Generation =========
 def generate_response(
     message_text: str,
     image_input,
     if max_new_tokens is None: max_new_tokens = 4096
     if repetition_penalty is None: repetition_penalty = 1.0
+    # Deterministic settings for schema modes
+    if output_mode in ("json", "report_en"):
+        temperature = 0.0
+        top_p = 1.0
+        repetition_penalty = 1.0
+        max_new_tokens = min(int(max_new_tokens), 1024)
     dbg(f"[gen] temp={temperature} top_p={top_p} max_new={max_new_tokens} rep={repetition_penalty} mode={output_mode}")
     chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
     if conv_mode_override and conv_mode_override in conv_templates:
         chatbot.conversation = conv_templates[conv_mode_override].copy()
+    # Load image → tensor
     try:
         pil_img = load_image_any(image_input)
     except Exception as e:
         return {"error": f"Failed to load image: {e}"}
     device = next(chatbot.model.parameters()).device
     dtype = torch.float16
     image_tensor = None
     try:
         if hasattr(chatbot.image_processor, "preprocess"):
         else:
             raise AttributeError("processor has no preprocess")
     except Exception:
         try:
             processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
             if isinstance(processed, (list, tuple)) and len(processed) > 0:
         except Exception:
             from torchvision import transforms
             from torchvision.transforms import InterpolationMode
+            expected_size = get_vision_expected_size(chatbot.model, default=336)
             preprocess = transforms.Compose([
                 transforms.Resize(expected_size, interpolation=InterpolationMode.BICUBIC),
                 transforms.CenterCrop(expected_size),
     if image_tensor is None:
         return {"error": "Image processing failed (no tensor produced)"}
+    # Build prompt
     base_msg = (message_text or "").strip()
     if output_mode in ("json", "report_en"):
         msg = f"{base_msg}\n\n{JSON_SCHEMA_HINT_EN}"
+    else:  # "narrative"
         msg = f"{base_msg}\n\n{STYLE_HINT}"
+    dbg(f"[prompt] mode={output_mode}")
     _, input_ids = _build_prompt_and_ids(chatbot, msg, device)
     stop_str = chatbot.conversation.sep if chatbot.conversation.sep_style != SeparatorStyle.TWO else chatbot.conversation.sep2
         except Exception:
             pass
+    # Generate with streamer
     streamer = TextIteratorStreamer(chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         inputs=input_ids,
         images=image_tensor,
         streamer=streamer,
+        do_sample=(temperature > 0.0),
         temperature=float(temperature),
         top_p=float(top_p),
         max_new_tokens=int(max_new_tokens),
         stopping_criteria=[stopping],
     )
     try:
         t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
         t.start()
     except Exception as e:
         return {"error": f"Generation failed: {e}"}
+    # output_mode handlers
     if output_mode == "narrative":
         return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
+    # For json & report_en → parse once, with robust fallbacks
     try:
         start = text.find("{"); end = text.rfind("}")
         if start == -1 or end == -1 or end <= start:
+            raise ValueError("JSON braces not found")
         data = json.loads(text[start:end+1])
+        data["_parse_mode"] = "direct"
+    except Exception:
+        cleaned = _coerce_pseudo_json(text)
+        try:
+            data = json.loads(cleaned)
+            data["_parse_mode"] = "cleaned"
+        except Exception:
+            # Last resort: extract with regex from free text
+            data = _extract_fields_from_text(text)
+            data["_parse_mode"] = "extracted"
+    # Inject patient meta (local only)
     if patient_age_group:
         data["patient_age_group"] = patient_age_group
     if patient_sex:
     # Fallback
     return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
+# ========= Public API =========
 def query(payload: dict):
     global model_initialized, tokenizer, model, image_processor, context_len, args
     if not model_initialized:
         det_seed           = payload.get("det_seed", None)
         output_mode        = payload.get("output_mode", "narrative")
+        # Optional patient meta (local use only)
         patient_age_group  = payload.get("patient_age_group")
         patient_sex        = payload.get("patient_sex")
         "device": str(next(model.parameters()).device) if model else "Unknown",
     }
+# ========= Init & Session =========
 class _Args:
     def __init__(self):
         self.model_path = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
         tokenizer_, model_, image_processor_, context_len_ = load_pretrained_model(
             args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
         )
+        dbg(f"[init] loaded model/tokenizer/processor | context_len={context_len_}")
         try:
             _ = next(model_.parameters()).device
             if torch.cuda.is_available():
                 model_ = model_.to(torch.device("cuda"))
         model_.eval()
+        dbg(f"[init] device={next(model_.parameters()).device}, cuda={torch.cuda.is_available()}")
         expected_size = get_vision_expected_size(model_, default=336)
+        try:
+            if image_processor_ is None:
+                from transformers import AutoProcessor, CLIPImageProcessor
+                try:
+                    image_processor_ = AutoProcessor.from_pretrained(args.model_path)
+                except Exception:
+                    clip_id = "openai/clip-vit-large-patch14-336" if expected_size >= 336 else "openai/clip-vit-large-patch14"
+                    image_processor_ = CLIPImageProcessor.from_pretrained(clip_id)
+            force_processor_size(image_processor_, expected_size)
+        except Exception as e_ip:
+            warn(f"[init] image_processor fallback/size set failed: {e_ip}")
+        # publish
         globals()["tokenizer"] = tokenizer_
         globals()["model"] = model_
         globals()["image_processor"] = image_processor_
         globals()["context_len"] = context_len_
         chat_manager.init_if_needed(args, args.model_path, tokenizer_, model_, image_processor_, context_len_)
+        print("[init] model/tokenizer/image_processor loaded.")
         return True
     except Exception as e:
         warn(f"[init] failed: {e}")
         return False
+# ========= HF EndpointHandler =========
 class EndpointHandler:
+    """Hugging Face Endpoint compatible."""
     def __init__(self, model_dir):
         self.model_dir = model_dir
         print(f"EndpointHandler initialized with model_dir: {model_dir}")
         return get_model_info()
 if __name__ == "__main__":
+    print("Handler ready (Deterministic JSON→Narrative with robust fallbacks, age+sex aware). Use `EndpointHandler` or `query`.")
+# ========= Optional FastAPI Wrapper =========
 try:
     from fastapi import FastAPI
     from pydantic import BaseModel
     warn(f"fastapi/pydantic not available: {e}")
 if FASTAPI_AVAILABLE:
+    app = FastAPI(title="PULSE ECG Handler API", version="1.4.0")
     class QueryIn(BaseModel):
         message: str | None = None
         data["output_mode"] = "report_en"
         return query(data)
 else:
+    app = None  # uvicorn handler:app would fail if FastAPI is not installed