CanerDedeoglu
/

Rapid_ECG

@@ -1,21 +1,15 @@
 # -*- coding: utf-8 -*-
 """
-PULSE ECG Handler — Demo Parity + Style Hint + Robust Fallbacks + Debug + Dynamic Vision Size + JSON/Report (EN)
-- Generation settings aligned with demo app.py:
-  do_sample=True, temperature=0.05, top_p=1.0, max_new_tokens=4096
-- Stopping: safe keyword match on conversation separator (conv.sep/sep2)
-- Image tensor: .half() on model device
-- Streamer: TextIteratorStreamer with background thread (demo-like)
-- Stochastic by default (seed/deterministic OFF unless provided)
-- STYLE_HINT: narrative + single-line 'Structured clinical impression:' ending
-- Post-process: whitespace cleanup only
-- Extras:
-  * DEBUG helpers (ENV: DEBUG=1)
-  * Dynamic vision size (vision tower -> processor + preprocess/fallback)
-  * image_processor fallback (AutoProcessor → CLIPImageProcessor)
-  * process_images fallback (torchvision + CLIP norm)
-  * FastAPI wrapper: /health, /info, /query, /debug, /analyze/json, /analyze/report-en
-  * JSON schema (EN) and report renderer (table text + narrative)
 """
 import os
@@ -32,7 +26,7 @@ import torch
 from PIL import Image
 import requests
-# ====== Debug Helpers ======
 def _env_bool(name: str, default: bool = False) -> bool:
     v = os.getenv(name)
     if v is None:
@@ -48,7 +42,7 @@ def dbg(*args, **kwargs):
 def warn(*args, **kwargs):
     print("[WARN]", *args, **kwargs)
-# ====== LLaVA & Transformers ======
 try:
     from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
     from llava.conversation import conv_templates, SeparatorStyle
@@ -67,7 +61,7 @@ except Exception as e:
     TRANSFORMERS_AVAILABLE = False
     warn(f"transformers not available: {e}")
-# ====== HF Hub logging (optional) ======
 try:
     from huggingface_hub import HfApi, login
     HF_HUB_AVAILABLE = True
@@ -89,7 +83,7 @@ if HF_HUB_AVAILABLE and "HF_TOKEN" in os.environ:
 LOGDIR = "./logs"
 os.makedirs(LOGDIR, exist_ok=True)
-# ====== Global State ======
 tokenizer = None
 model = None
 image_processor = None
@@ -97,7 +91,7 @@ context_len = None
 args = None
 model_initialized = False
-# ====== Style Hint (demo-like narrative) ======
 STYLE_HINT = (
     "Write one concise narrative paragraph that covers rhythm, heart rate, cardiac axis, "
     "P waves and PR interval, QRS morphology and duration, ST segments, T waves, and QT/QTc. "
@@ -106,27 +100,24 @@ STYLE_HINT = (
     "followed by a succinct, comma-separated summary of the key diagnoses."
 )
-# ====== JSON Schema (EN) for strict machine-readable output ======
 JSON_SCHEMA_HINT_EN = """
 Return ONLY a valid JSON object that matches EXACTLY this schema:
 {
-  "heart_rate_bpm": int,                     // e.g., 128
-  "rhythm": "string",                        // e.g., "Sinus tachycardia"
-  "qrs_axis": "string",                      // e.g., "Normal (+16°)"
-  "p_waves": "string",                       // e.g., "Normal"
-  "pr_interval_ms": int,                     // e.g., 160
-  "qrs_duration_ms": int,                    // e.g., 84
-  "t_waves": "string",                       // e.g., "Negative in DIII, aVF, V1–V4"
-  "qtc_ms": int,                             // e.g., 467
-  "qtc_comment": "string",                   // e.g., "Mildly prolonged"
-  "additional_comments": "string"            // e.g., "S1Q3T3 pattern and anterior T-wave inversions present."
 }
 Rules:
 - Output MUST be valid JSON with no extra text before or after.
-- Units: use numbers for bpm and ms (integers only).
-- If unknown, use null (ints may be null).
 - Use standard cardiology terminology in English.
 """
@@ -148,13 +139,6 @@ def _conv_log_path() -> str:
     return os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
 def load_image_any(image_input: Union[str, dict]) -> Image.Image:
-    """
-    Supported:
-      - URL (http/https)
-      - local file path
-      - base64 (optionally with data URL prefix)
-      - {"image": <base64|dataurl>}
-    """
     if isinstance(image_input, str):
         s = image_input.strip()
         if s.startswith(("http://", "https://")):
@@ -163,15 +147,12 @@ def load_image_any(image_input: Union[str, dict]) -> Image.Image:
             return Image.open(BytesIO(r.content)).convert("RGB")
         if os.path.exists(s):
             return Image.open(s).convert("RGB")
-        # base64 (maybe dataurl)
         if s.startswith("data:image"):
             s = s.split(",", 1)[1]
         raw = base64.b64decode(s)
         return Image.open(BytesIO(raw)).convert("RGB")
     if isinstance(image_input, dict) and "image" in image_input:
         return load_image_any(image_input["image"])
     raise ValueError("Unsupported image input format")
 def _normalize_whitespace(text: str) -> str:
@@ -184,11 +165,8 @@ def _normalize_whitespace(text: str) -> str:
 def _postprocess_min(text: str) -> str:
     return _normalize_whitespace(text)
-# ====== Vision helpers (dynamic size) ======
 def get_vision_expected_size(m, default: int = 336) -> int:
-    """
-    Returns expected image size for the model's vision tower (e.g., 336).
-    """
     try:
         vt = m.get_vision_tower()
         vt_cfg = getattr(getattr(vt, "vision_tower", vt), "config", None)
@@ -204,51 +182,45 @@ def get_vision_expected_size(m, default: int = 336) -> int:
     return default
 def force_processor_size(proc, size: int):
-    """Force processor resize/crop to target size safely."""
     try:
-        # size
         if hasattr(proc, "size"):
             if isinstance(proc.size, dict):
                 proc.size["shortest_edge"] = size
             else:
                 try:
-                    proc.size.shortest_edge = size  # type: ignore[attr-defined]
                 except Exception:
                     proc.size = {"shortest_edge": size}
-        # crop_size
         if hasattr(proc, "crop_size"):
             if isinstance(proc.crop_size, dict):
                 proc.crop_size["height"] = size
-                proc.crop_size["width"]  = size
             else:
                 try:
-                    proc.crop_size.height = size  # type: ignore[attr-defined]
-                    proc.crop_size.width  = size  # type: ignore[attr-defined]
                 except Exception:
                     proc.crop_size = {"height": size, "width": size}
         dbg(f"[processor] forced size={size}")
     except Exception as e:
         warn(f"[processor] force size failed: {e}")
-# ====== Safe Stop Criteria (conv separator) ======
 class SafeKeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keyword: str, tokenizer):
-        self.tokenizer = tokenizer
         tok = tokenizer(keyword, add_special_tokens=False, return_tensors="pt").input_ids[0]
-        self.kw_ids = tok  # shape: (n,)
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
         if input_ids is None or input_ids.shape[0] == 0:
             return False
-        out = input_ids[0]  # assume bsz=1
         n = self.kw_ids.shape[0]
         if out.shape[0] < n:
             return False
         tail = out[-n:]
-        kw = self.kw_ids.to(tail.device)
-        return torch.equal(tail, kw)
-# ===================== Core Generation =====================
 class InferenceDemo:
     def __init__(self, args, model_path, tokenizer_, model_, image_processor_, context_len_):
         if not LLAVA_AVAILABLE:
@@ -288,6 +260,150 @@ def _build_prompt_and_ids(chatbot, user_text: str, device: torch.device):
     ).unsqueeze(0).to(device)
     return prompt, input_ids
 def generate_response(
     message_text: str,
     image_input,
@@ -298,7 +414,9 @@ def generate_response(
     conv_mode_override: Optional[str] = None,
     repetition_penalty: Optional[float] = None,
     det_seed: Optional[int] = None,
-    output_mode: Optional[str] = "narrative",   # "narrative" | "json" | "report_en"
 ):
     if not (LLAVA_AVAILABLE and TRANSFORMERS_AVAILABLE):
         return {"error": "Required libraries not available (llava/transformers)"}
@@ -310,63 +428,19 @@ def generate_response(
     if max_new_tokens is None: max_new_tokens = 4096
     if repetition_penalty is None: repetition_penalty = 1.0
-    dbg(f"[gen] temperature={temperature} top_p={top_p} max_new_tokens={max_new_tokens} rep={repetition_penalty} seed={det_seed} mode={output_mode}")
-    if output_mode == "report_en":
-        # Ensure a session exists so we can safely expose a conversation_id
-        try:
-            _cb = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
-            conv_id = id(_cb.conversation)
-        except Exception:
-            conv_id = None
-        # 1) Produce strict JSON (machine-readable)
-        first = generate_response(
-            message_text=message_text,
-            image_input=image_input,
-            temperature=temperature, top_p=top_p,
-            max_new_tokens=max_new_tokens,
-            conv_mode_override=conv_mode_override,
-            repetition_penalty=repetition_penalty,
-            det_seed=det_seed,
-            output_mode="json",
-        )
-        if not isinstance(first, dict) or "response" not in first or not isinstance(first["response"], dict):
-            return first
-        data = first["response"]
-        # 2) Produce short narrative (human-readable)
-        second = generate_response(
-            message_text=message_text,
-            image_input=image_input,
-            temperature=temperature, top_p=top_p,
-            max_new_tokens=min(int(max_new_tokens), 512),
-            conv_mode_override=conv_mode_override,
-            repetition_penalty=repetition_penalty,
-            det_seed=det_seed,
-            output_mode="narrative",
-        )
-        narrative = second.get("response") if isinstance(second, dict) else None
-        table_txt = render_ecg_table_en(data)
-        return {
-            "status": "success",
-            "report": {"table_text": table_txt, "json": data, "narrative": narrative},
-            "conversation_id": conv_id
-        }
     chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
     if conv_mode_override and conv_mode_override in conv_templates:
         chatbot.conversation = conv_templates[conv_mode_override].copy()
-    # Load image (PIL)
     try:
         pil_img = load_image_any(image_input)
     except Exception as e:
         return {"error": f"Failed to load image: {e}"}
-    # Save image log (optional)
     img_hash, img_path = "NA", None
     try:
         buf = BytesIO(); pil_img.save(buf, format="JPEG"); raw = buf.getvalue()
@@ -382,10 +456,8 @@ def generate_response(
     device = next(chatbot.model.parameters()).device
     dtype = torch.float16
-    # === Image preprocessing → tensor (dynamic size) ===
     expected_size = get_vision_expected_size(chatbot.model, default=336)
-    dbg(f"[pre] dynamic expected_size={expected_size} | processor={type(chatbot.image_processor)}")
     image_tensor = None
     try:
         if hasattr(chatbot.image_processor, "preprocess"):
@@ -396,11 +468,10 @@ def generate_response(
             if image_tensor.ndim == 3:
                 image_tensor = image_tensor.unsqueeze(0)
             image_tensor = image_tensor.to(device=device, dtype=dtype)
-            dbg(f"[pre] processor.preprocess ok → {tuple(image_tensor.shape)}")
         else:
             raise AttributeError("processor has no preprocess")
-    except Exception as e_pre:
-        warn(f"[pre] processor.preprocess not used: {e_pre} → process_images fallback…")
         try:
             processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
             if isinstance(processed, (list, tuple)) and len(processed) > 0:
@@ -409,13 +480,10 @@ def generate_response(
                 image_tensor = processed[0] if processed.ndim == 4 else processed
             else:
                 raise ValueError("process_images returned empty")
             if image_tensor.ndim == 3:
                 image_tensor = image_tensor.unsqueeze(0)
             image_tensor = image_tensor.to(device=device, dtype=dtype)
-            dbg(f"[pre] process_images ok → {tuple(image_tensor.shape)}")
-        except Exception as e_proc:
-            warn(f"[pre] process_images failed: {e_proc} → manual CLIP fallback (dynamic size).")
             from torchvision import transforms
             from torchvision.transforms import InterpolationMode
             preprocess = transforms.Compose([
@@ -428,19 +496,17 @@ def generate_response(
                 ),
             ])
             image_tensor = preprocess(pil_img).unsqueeze(0).to(device=device, dtype=dtype)
-            dbg(f"[pre] manual fallback ok → {tuple(image_tensor.shape)}")
     if image_tensor is None:
         return {"error": "Image processing failed (no tensor produced)"}
-    # ===== Build message according to output_mode =====
     base_msg = (message_text or "").strip()
-    if output_mode == "json":
         msg = f"{base_msg}\n\n{JSON_SCHEMA_HINT_EN}"
-    else:  # "narrative"
         msg = f"{base_msg}\n\n{STYLE_HINT}"
-    dbg(f"[prompt] conv_sep_style={chatbot.conversation.sep_style} sep_len={len(chatbot.conversation.sep)}")
     _, input_ids = _build_prompt_and_ids(chatbot, msg, device)
     stop_str = chatbot.conversation.sep if chatbot.conversation.sep_style != SeparatorStyle.TWO else chatbot.conversation.sep2
@@ -457,7 +523,6 @@ def generate_response(
             pass
     streamer = TextIteratorStreamer(chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         inputs=input_ids,
         images=image_tensor,
@@ -471,19 +536,19 @@ def generate_response(
         stopping_criteria=[stopping],
     )
     try:
         t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
         t.start()
         chunks = []
         for piece in streamer:
             chunks.append(piece)
-        text = "".join(chunks)
-        text = _postprocess_min(text)
         chatbot.conversation.messages[-1][-1] = text
     except Exception as e:
         return {"error": f"Generation failed: {e}"}
-    # Logging
     try:
         row = {
             "time": datetime.datetime.now().isoformat(),
@@ -499,24 +564,42 @@ def generate_response(
     except Exception as e:
         warn(f"[log] failed: {e}")
-    # If JSON mode, parse and return as object
     if output_mode == "json":
-        try:
-            start = text.find("{"); end = text.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                obj = json.loads(text[start:end+1])
-            else:
-                return {"error": "JSON block not found", "raw": text}
-        except Exception as e:
-            return {"error": f"JSON parse failed: {e}", "raw": text}
-        return {"status": "success", "response": obj, "conversation_id": id(chatbot.conversation)}
-    # Default narrative
     return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
 # ===================== Public API =====================
 def query(payload: dict):
-    """HF Endpoint entry (demo-like)."""
     global model_initialized, tokenizer, model, image_processor, context_len, args
     if not model_initialized:
         if not initialize_model():
@@ -536,7 +619,11 @@ def query(payload: dict):
         conv_mode_override = payload.get("conv_mode", None)
         det_seed           = payload.get("det_seed", None)
-        output_mode        = payload.get("output_mode", "narrative")  # "narrative" | "json" | "report_en"
         if det_seed is not None:
             try: det_seed = int(det_seed)
@@ -552,6 +639,8 @@ def query(payload: dict):
             repetition_penalty=repetition_penalty,
             det_seed=det_seed,
             output_mode=output_mode,
         )
     except Exception as e:
         return {"error": f"Query failed: {e}"}
@@ -600,7 +689,6 @@ def initialize_model():
         tokenizer_, model_, image_processor_, context_len_ = load_pretrained_model(
             args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
         )
-        dbg(f"[init] load_pretrained_model ok | tokenizer={type(tokenizer_)} | model={type(model_)} | image_processor={type(image_processor_)} | context_len={context_len_}")
         try:
             _ = next(model_.parameters()).device
@@ -608,53 +696,17 @@ def initialize_model():
             if torch.cuda.is_available():
                 model_ = model_.to(torch.device("cuda"))
         model_.eval()
-        dbg(f"[init] device={next(model_.parameters()).device}, cuda_available={torch.cuda.is_available()}")
-        # Vision tower expected image size
         expected_size = get_vision_expected_size(model_, default=336)
-        dbg(f"[init] vision expected image_size={expected_size}")
-        # image_processor fallback chain
-        try:
-            if image_processor_ is None:
-                dbg("[init] image_processor None → AutoProcessor(model_path)…")
-                try:
-                    from transformers import AutoProcessor
-                    image_processor_ = AutoProcessor.from_pretrained(args.model_path)
-                    dbg("[init] image_processor: AutoProcessor.from_pretrained(model_path) loaded.")
-                except Exception as _e1:
-                    dbg(f"[init] AutoProcessor(model_path) failed: {_e1}")
-                    try:
-                        from transformers import AutoProcessor
-                        clip_id = "openai/clip-vit-large-patch14-336" if expected_size >= 336 else "openai/clip-vit-large-patch14"
-                        image_processor_ = AutoProcessor.from_pretrained(clip_id)
-                        dbg(f"[init] AutoProcessor({clip_id}) loaded.")
-                    except Exception as _e2:
-                        from transformers import CLIPImageProcessor
-                        clip_id = "openai/clip-vit-large-patch14-336" if expected_size >= 336 else "openai/clip-vit-large-patch14"
-                        image_processor_ = CLIPImageProcessor.from_pretrained(clip_id)
-                        warn(f"[init] CLIPImageProcessor({clip_id}) fallback in use.")
-        except Exception as _e:
-            warn(f"[init] image_processor fallback chain failed: {_e}")
-        # Force processor sizes to match tower
-        try:
-            if image_processor_ is not None:
-                force_processor_size(image_processor_, expected_size)
-        except Exception as e_ip:
-            warn(f"[init] processor size set error: {e_ip}")
-        # Processor introspection
-        try:
-            ip = image_processor_
-            if ip is not None:
-                crop_sz = getattr(getattr(ip, "crop_size", None), "height", None) or getattr(ip, "crop_size", None)
-                size_sz = getattr(getattr(ip, "size", None), "shortest_edge", None) or getattr(ip, "size", None)
-                dbg(f"[init] image_processor crop_size={crop_sz} size={size_sz} class={ip.__class__.__name__}")
-            else:
-                warn("[init] image_processor still None (fallback failed).")
-        except Exception as e_ip2:
-            warn(f"[init] image_processor inspect error: {e_ip2}")
         globals()["tokenizer"] = tokenizer_
         globals()["model"] = model_
@@ -662,51 +714,15 @@ def initialize_model():
         globals()["context_len"] = context_len_
         chat_manager.init_if_needed(args, args.model_path, tokenizer_, model_, image_processor_, context_len_)
-        print("[init] model/tokenizer/image_processor loaded.")
         return True
     except Exception as e:
         warn(f"[init] failed: {e}")
         return False
-# ===================== Report rendering (EN) =====================
-def render_ecg_table_en(d: Dict[str, Any]) -> str:
-    def g(k, default="—"):
-        v = d.get(k, None)
-        if v is None: return default
-        return str(v)
-    hr = g("heart_rate_bpm")
-    rhythm = g("rhythm")
-    axis = g("qrs_axis")
-    p = g("p_waves")
-    pr = g("pr_interval_ms")
-    qrs_dur = g("qrs_duration_ms")
-    t = g("t_waves")
-    qtc = g("qtc_ms")
-    qtc_c = g("qtc_comment")
-    extra = g("additional_comments")
-    lines = [
-        "ECG ANALYSIS",
-        "────────────",
-        f"Heart rate        : {hr} beats/min",
-        f"Rhythm            : {rhythm}",
-        f"QRS axis          : {axis}",
-        f"P waves           : {p}",
-        f"PR interval       : {pr} ms",
-        f"QRS duration      : {qrs_dur} ms",
-        f"T waves           : {t}",
-        f"QTc               : {qtc_c} ({qtc} ms)",
-        "",
-        "Additional comments",
-        "──────────────────",
-        f"{extra}"
-    ]
-    return "\n".join(lines)
-# ===================== HF EndpointHandler =====================
 class EndpointHandler:
-    """Hugging Face Endpoint-compatible wrapper."""
     def __init__(self, model_dir):
         self.model_dir = model_dir
         print(f"EndpointHandler initialized with model_dir: {model_dir}")
@@ -720,9 +736,9 @@ class EndpointHandler:
         return get_model_info()
 if __name__ == "__main__":
-    print("Handler ready (Demo Parity + Style Hint + whitespace post-process + dynamic size + fallbacks + debug + JSON/Report-EN). Use `EndpointHandler` or `query`.")
-# ===================== Minimal FastAPI Wrapper =====================
 try:
     from fastapi import FastAPI
     from pydantic import BaseModel
@@ -732,7 +748,7 @@ except Exception as e:
     warn(f"fastapi/pydantic not available: {e}")
 if FASTAPI_AVAILABLE:
-    app = FastAPI(title="PULSE ECG Handler API", version="1.1.0")
     class QueryIn(BaseModel):
         message: str | None = None
@@ -750,7 +766,9 @@ if FASTAPI_AVAILABLE:
         repetition_penalty: float | None = None
         conv_mode: str | None = None
         det_seed: int | None = None
-        output_mode: str | None = None  # "narrative" | "json" | "report_en"
     @app.on_event("startup")
     async def _startup():
@@ -767,39 +785,6 @@ if FASTAPI_AVAILABLE:
     async def _info():
         return get_model_info()
-    @app.get("/debug")
-    async def _debug():
-        try:
-            dev = str(next(model.parameters()).device) if model else "Unknown"
-        except Exception:
-            dev = "Unknown"
-        try:
-            ip = image_processor
-            ip_cls = ip.__class__.__name__ if ip else None
-            crop_sz = getattr(getattr(ip, "crop_size", None), "height", None) or getattr(ip, "crop_size", None)
-            size_short = getattr(getattr(ip, "size", None), "shortest_edge", None) or getattr(ip, "size", None)
-        except Exception:
-            ip_cls, crop_sz, size_short = None, None, None
-        try:
-            ve = get_vision_expected_size(model, default=None) if model else None
-        except Exception:
-            ve = None
-        return {
-            "debug": bool(DEBUG),
-            "llava_available": LLAVA_AVAILABLE,
-            "transformers_available": TRANSFORMERS_AVAILABLE,
-            "device": dev,
-            "context_len": context_len,
-            "image_processor_class": ip_cls,
-            "image_processor_crop_size": crop_sz,
-            "image_processor_size": {"shortest_edge": size_short},
-            "vision_expected_image_size": ve,
-            "model_path": args.model_path if args else None,
-        }
     @app.post("/query")
     async def _query(payload: QueryIn):
         return query({k: v for k, v in payload.dict().items() if v is not None})
@@ -816,4 +801,5 @@ if FASTAPI_AVAILABLE:
         data["output_mode"] = "report_en"
         return query(data)
 else:
-    app = None  # Running "uvicorn handler:app" will raise import error if FastAPI missing

 # -*- coding: utf-8 -*-
 """
+PULSE ECG Handler — Deterministic JSON→Narrative (age+sex aware)
+- Model still processes image (LLaVA/transformers)
+- output_mode="json"      → returns structured JSON (single model call)
+- output_mode="report_en" → JSON + table + narrative (derived deterministically from JSON; still single model call)
+- output_mode="narrative" → classic narrative paragraph (model free-form)
+Notes:
+- For "json" and "report_en" modes we prompt the model with a strict JSON schema hint.
+- Age group ("0-15" | "15-65" | "65+") and sex ("male" | "female") are accepted from payload
+  and used only in deterministic narrative rendering (not sent to the model).
 """
 import os
 from PIL import Image
 import requests
+# ==== Debug helpers ====
 def _env_bool(name: str, default: bool = False) -> bool:
     v = os.getenv(name)
     if v is None:
 def warn(*args, **kwargs):
     print("[WARN]", *args, **kwargs)
+# ==== LLaVA & Transformers ====
 try:
     from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
     from llava.conversation import conv_templates, SeparatorStyle
     TRANSFORMERS_AVAILABLE = False
     warn(f"transformers not available: {e}")
+# ==== HF Hub logging (optional) ====
 try:
     from huggingface_hub import HfApi, login
     HF_HUB_AVAILABLE = True
 LOGDIR = "./logs"
 os.makedirs(LOGDIR, exist_ok=True)
+# ==== Global state ====
 tokenizer = None
 model = None
 image_processor = None
 args = None
 model_initialized = False
+# ==== Prompts ====
 STYLE_HINT = (
     "Write one concise narrative paragraph that covers rhythm, heart rate, cardiac axis, "
     "P waves and PR interval, QRS morphology and duration, ST segments, T waves, and QT/QTc. "
     "followed by a succinct, comma-separated summary of the key diagnoses."
 )
 JSON_SCHEMA_HINT_EN = """
 Return ONLY a valid JSON object that matches EXACTLY this schema:
 {
+  "heart_rate_bpm": int | null,
+  "rhythm": "string",
+  "qrs_axis": "string",
+  "p_waves": "string",
+  "pr_interval_ms": int | null,
+  "qrs_duration_ms": int | null,
+  "t_waves": "string",
+  "qtc_ms": int | null,
+  "qtc_comment": "string",
+  "additional_comments": "string"
 }
 Rules:
 - Output MUST be valid JSON with no extra text before or after.
+- Units: use integers for bpm and ms where applicable.
+- If unknown, use null for numeric fields and empty string for text fields.
 - Use standard cardiology terminology in English.
 """
     return os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
 def load_image_any(image_input: Union[str, dict]) -> Image.Image:
     if isinstance(image_input, str):
         s = image_input.strip()
         if s.startswith(("http://", "https://")):
             return Image.open(BytesIO(r.content)).convert("RGB")
         if os.path.exists(s):
             return Image.open(s).convert("RGB")
         if s.startswith("data:image"):
             s = s.split(",", 1)[1]
         raw = base64.b64decode(s)
         return Image.open(BytesIO(raw)).convert("RGB")
     if isinstance(image_input, dict) and "image" in image_input:
         return load_image_any(image_input["image"])
     raise ValueError("Unsupported image input format")
 def _normalize_whitespace(text: str) -> str:
 def _postprocess_min(text: str) -> str:
     return _normalize_whitespace(text)
+# ====== Vision helpers ======
 def get_vision_expected_size(m, default: int = 336) -> int:
     try:
         vt = m.get_vision_tower()
         vt_cfg = getattr(getattr(vt, "vision_tower", vt), "config", None)
     return default
 def force_processor_size(proc, size: int):
     try:
         if hasattr(proc, "size"):
             if isinstance(proc.size, dict):
                 proc.size["shortest_edge"] = size
             else:
                 try:
+                    proc.size.shortest_edge = size
                 except Exception:
                     proc.size = {"shortest_edge": size}
         if hasattr(proc, "crop_size"):
             if isinstance(proc.crop_size, dict):
                 proc.crop_size["height"] = size
+                proc.crop_size["width"] = size
             else:
                 try:
+                    proc.crop_size.height = size
+                    proc.crop_size.width = size
                 except Exception:
                     proc.crop_size = {"height": size, "width": size}
         dbg(f"[processor] forced size={size}")
     except Exception as e:
         warn(f"[processor] force size failed: {e}")
+# ====== Stop Criteria ======
 class SafeKeywordsStoppingCriteria(StoppingCriteria):
     def __init__(self, keyword: str, tokenizer):
         tok = tokenizer(keyword, add_special_tokens=False, return_tensors="pt").input_ids[0]
+        self.kw_ids = tok
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
         if input_ids is None or input_ids.shape[0] == 0:
             return False
+        out = input_ids[0]
         n = self.kw_ids.shape[0]
         if out.shape[0] < n:
             return False
         tail = out[-n:]
+        return torch.equal(tail, self.kw_ids.to(tail.device))
+# ===================== Core =====================
 class InferenceDemo:
     def __init__(self, args, model_path, tokenizer_, model_, image_processor_, context_len_):
         if not LLAVA_AVAILABLE:
     ).unsqueeze(0).to(device)
     return prompt, input_ids
+# ===================== Deterministic renderers =====================
+def render_ecg_table_en(d: Dict[str, Any]) -> str:
+    lines = ["ECG ANALYSIS", "────────────"]
+    if "heart_rate_bpm" in d and d["heart_rate_bpm"] is not None:
+        lines.append(f"Heart rate        : {d['heart_rate_bpm']} beats/min")
+    if "rhythm" in d:
+        lines.append(f"Rhythm            : {d['rhythm']}")
+    if "qrs_axis" in d:
+        lines.append(f"QRS axis          : {d['qrs_axis']}")
+    if "p_waves" in d:
+        lines.append(f"P waves           : {d['p_waves']}")
+    if "pr_interval_ms" in d and d["pr_interval_ms"] is not None:
+        lines.append(f"PR interval       : {d['pr_interval_ms']} ms")
+    if "qrs_duration_ms" in d and d["qrs_duration_ms"] is not None:
+        lines.append(f"QRS duration      : {d['qrs_duration_ms']} ms")
+    if "t_waves" in d:
+        lines.append(f"T waves           : {d['t_waves']}")
+    if "qtc_ms" in d and d["qtc_ms"] is not None:
+        qtc_c = d.get("qtc_comment", "").strip()
+        qtc_c = qtc_c if qtc_c else "—"
+        lines.append(f"QTc               : {qtc_c} ({d['qtc_ms']} ms)")
+    lines.append("")
+    lines.append("Additional comments")
+    lines.append("──────────────────")
+    lines.append(d.get("additional_comments", "").strip())
+    return "\n".join(lines)
+def render_ecg_narrative_en(d: Dict[str, Any]) -> str:
+    """Deterministic narrative based on JSON + age_group + sex"""
+    hr = d.get("heart_rate_bpm")
+    rhythm = d.get("rhythm")
+    axis = d.get("qrs_axis")
+    p = d.get("p_waves")
+    pr = d.get("pr_interval_ms")
+    qrs_dur = d.get("qrs_duration_ms")
+    t = d.get("t_waves")
+    qtc = d.get("qtc_ms")
+    extra = d.get("additional_comments")
+    age_group = d.get("patient_age_group")  # "0-15" | "15-65" | "65+"
+    sex = d.get("patient_sex")              # "male" | "female"
+    # thresholds by age group
+    if age_group == "0-15":
+        hr_low, hr_high = 70, 120
+        pr_low, pr_high = 110, 180
+        qrs_limit = 100
+        qtc_male, qtc_female = 460, 470
+    elif age_group == "65+":
+        hr_low, hr_high = 50, 100
+        pr_low, pr_high = 120, 220
+        qrs_limit = 120
+        qtc_male, qtc_female = 460, 480
+    else:  # default 15-65
+        hr_low, hr_high = 60, 100
+        pr_low, pr_high = 120, 200
+        qrs_limit = 120
+        qtc_male, qtc_female = 450, 470
+    para = []
+    # patient context
+    if age_group and sex:
+        para.append(f"The patient is a {age_group} years {sex}.")
+    elif age_group:
+        para.append(f"The patient belongs to the {age_group} years age group.")
+    elif sex:
+        para.append(f"The patient is {sex}.")
+    if rhythm:
+        para.append(f"The electrocardiogram shows {rhythm.lower()}.")
+    if isinstance(hr, int):
+        if hr < hr_low:
+            hr_comment = "bradycardia"
+        elif hr > hr_high:
+            hr_comment = "tachycardia"
+        else:
+            hr_comment = "within normal range"
+        para.append(f"The heart rate is {hr} bpm ({hr_comment}).")
+    if axis:
+        para.append(f"The QRS axis is {axis.lower()}.")
+    if p:
+        para.append(f"P waves are {p.lower()}.")
+    if isinstance(pr, int):
+        if pr < pr_low:
+            pr_comment = "short PR interval"
+        elif pr > pr_high:
+            pr_comment = "prolonged PR interval"
+        else:
+            pr_comment = "within normal range"
+        para.append(f"PR interval is {pr} ms ({pr_comment}).")
+    if isinstance(qrs_dur, int):
+        if qrs_dur >= qrs_limit:
+            qrs_comment = "prolonged QRS (possible conduction delay)"
+        else:
+            qrs_comment = "normal QRS duration"
+        para.append(f"QRS duration is {qrs_dur} ms ({qrs_comment}).")
+    if t:
+        para.append(f"T waves: {t}.")
+    if isinstance(qtc, int):
+        if sex == "male":
+            if qtc > qtc_male:
+                qtc_comment = "prolonged for male"
+            elif qtc < 350:
+                qtc_comment = "shortened"
+            else:
+                qtc_comment = "normal for male"
+        elif sex == "female":
+            if qtc > qtc_female:
+                qtc_comment = "prolonged for female"
+            elif qtc < 360:
+                qtc_comment = "shortened"
+            else:
+                qtc_comment = "normal for female"
+        else:
+            if qtc > max(qtc_male, qtc_female):
+                qtc_comment = "prolonged"
+            elif qtc < 350:
+                qtc_comment = "shortened"
+            else:
+                qtc_comment = "normal"
+        para.append(f"QTc is {qtc} ms ({qtc_comment}).")
+    if isinstance(extra, str) and extra.strip():
+        para.append(extra.strip())
+    paragraph = " ".join(para).strip()
+    sci_bits = []
+    if rhythm: sci_bits.append(rhythm)
+    if axis: sci_bits.append(f"QRS axis: {axis}")
+    if isinstance(pr, int): sci_bits.append(f"PR {pr} ms")
+    if isinstance(qrs_dur, int): sci_bits.append(f"QRS {qrs_dur} ms")
+    if isinstance(qtc, int): sci_bits.append(f"QTc {qtc} ms")
+    if isinstance(extra, str) and extra.strip(): sci_bits.append(extra.strip())
+    return paragraph + "\n\n" + "Structured clinical impression: " + ", ".join(sci_bits)
+# ===================== Generation =====================
 def generate_response(
     message_text: str,
     image_input,
     conv_mode_override: Optional[str] = None,
     repetition_penalty: Optional[float] = None,
     det_seed: Optional[int] = None,
+    output_mode: str = "narrative",  # "narrative" | "json" | "report_en"
+    patient_age_group: Optional[str] = None,
+    patient_sex: Optional[str] = None,
 ):
     if not (LLAVA_AVAILABLE and TRANSFORMERS_AVAILABLE):
         return {"error": "Required libraries not available (llava/transformers)"}
     if max_new_tokens is None: max_new_tokens = 4096
     if repetition_penalty is None: repetition_penalty = 1.0
+    dbg(f"[gen] temp={temperature} top_p={top_p} max_new={max_new_tokens} rep={repetition_penalty} mode={output_mode}")
     chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
     if conv_mode_override and conv_mode_override in conv_templates:
         chatbot.conversation = conv_templates[conv_mode_override].copy()
+    # Load image
     try:
         pil_img = load_image_any(image_input)
     except Exception as e:
         return {"error": f"Failed to load image: {e}"}
+    # Save image (log)
     img_hash, img_path = "NA", None
     try:
         buf = BytesIO(); pil_img.save(buf, format="JPEG"); raw = buf.getvalue()
     device = next(chatbot.model.parameters()).device
     dtype = torch.float16
+    # Preprocess image → tensor
     expected_size = get_vision_expected_size(chatbot.model, default=336)
     image_tensor = None
     try:
         if hasattr(chatbot.image_processor, "preprocess"):
             if image_tensor.ndim == 3:
                 image_tensor = image_tensor.unsqueeze(0)
             image_tensor = image_tensor.to(device=device, dtype=dtype)
         else:
             raise AttributeError("processor has no preprocess")
+    except Exception:
+        # Fallback chain: process_images → manual CLIP norm
         try:
             processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
             if isinstance(processed, (list, tuple)) and len(processed) > 0:
                 image_tensor = processed[0] if processed.ndim == 4 else processed
             else:
                 raise ValueError("process_images returned empty")
             if image_tensor.ndim == 3:
                 image_tensor = image_tensor.unsqueeze(0)
             image_tensor = image_tensor.to(device=device, dtype=dtype)
+        except Exception:
             from torchvision import transforms
             from torchvision.transforms import InterpolationMode
             preprocess = transforms.Compose([
                 ),
             ])
             image_tensor = preprocess(pil_img).unsqueeze(0).to(device=device, dtype=dtype)
     if image_tensor is None:
         return {"error": "Image processing failed (no tensor produced)"}
+    # Prompt selection
     base_msg = (message_text or "").strip()
+    if output_mode in ("json", "report_en"):
         msg = f"{base_msg}\n\n{JSON_SCHEMA_HINT_EN}"
+    else:  # narrative
         msg = f"{base_msg}\n\n{STYLE_HINT}"
     _, input_ids = _build_prompt_and_ids(chatbot, msg, device)
     stop_str = chatbot.conversation.sep if chatbot.conversation.sep_style != SeparatorStyle.TWO else chatbot.conversation.sep2
             pass
     streamer = TextIteratorStreamer(chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True)
     gen_kwargs = dict(
         inputs=input_ids,
         images=image_tensor,
         stopping_criteria=[stopping],
     )
+    # Generate
     try:
         t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
         t.start()
         chunks = []
         for piece in streamer:
             chunks.append(piece)
+        text = _postprocess_min("".join(chunks))
         chatbot.conversation.messages[-1][-1] = text
     except Exception as e:
         return {"error": f"Generation failed: {e}"}
+    # Log
     try:
         row = {
             "time": datetime.datetime.now().isoformat(),
     except Exception as e:
         warn(f"[log] failed: {e}")
+    # Output modes
+    if output_mode == "narrative":
+        return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
+    # For json & report_en we need to parse JSON once
+    try:
+        start = text.find("{"); end = text.rfind("}")
+        if start == -1 or end == -1 or end <= start:
+            return {"error": "JSON block not found", "raw": text}
+        data = json.loads(text[start:end+1])
+    except Exception as e:
+        return {"error": f"JSON parse failed: {e}", "raw": text}
+    # Inject patient metadata (not sent to model; used for deterministic narrative)
+    if patient_age_group:
+        data["patient_age_group"] = patient_age_group
+    if patient_sex:
+        data["patient_sex"] = patient_sex
     if output_mode == "json":
+        return {"status": "success", "response": data, "conversation_id": id(chatbot.conversation)}
+    if output_mode == "report_en":
+        narrative = render_ecg_narrative_en(data)
+        table_txt = render_ecg_table_en(data)
+        return {
+            "status": "success",
+            "report": {"table_text": table_txt, "json": data, "narrative": narrative},
+            "conversation_id": id(chatbot.conversation)
+        }
+    # Fallback
     return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
 # ===================== Public API =====================
 def query(payload: dict):
     global model_initialized, tokenizer, model, image_processor, context_len, args
     if not model_initialized:
         if not initialize_model():
         conv_mode_override = payload.get("conv_mode", None)
         det_seed           = payload.get("det_seed", None)
+        output_mode        = payload.get("output_mode", "narrative")
+        # Optional patient meta
+        patient_age_group  = payload.get("patient_age_group")
+        patient_sex        = payload.get("patient_sex")
         if det_seed is not None:
             try: det_seed = int(det_seed)
             repetition_penalty=repetition_penalty,
             det_seed=det_seed,
             output_mode=output_mode,
+            patient_age_group=patient_age_group,
+            patient_sex=patient_sex,
         )
     except Exception as e:
         return {"error": f"Query failed: {e}"}
         tokenizer_, model_, image_processor_, context_len_ = load_pretrained_model(
             args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
         )
         try:
             _ = next(model_.parameters()).device
             if torch.cuda.is_available():
                 model_ = model_.to(torch.device("cuda"))
         model_.eval()
         expected_size = get_vision_expected_size(model_, default=336)
+        if image_processor_ is None:
+            try:
+                from transformers import AutoProcessor
+                image_processor_ = AutoProcessor.from_pretrained(args.model_path)
+            except Exception:
+                from transformers import CLIPImageProcessor
+                clip_id = "openai/clip-vit-large-patch14-336" if expected_size >= 336 else "openai/clip-vit-large-patch14"
+                image_processor_ = CLIPImageProcessor.from_pretrained(clip_id)
+        force_processor_size(image_processor_, expected_size)
         globals()["tokenizer"] = tokenizer_
         globals()["model"] = model_
         globals()["context_len"] = context_len_
         chat_manager.init_if_needed(args, args.model_path, tokenizer_, model_, image_processor_, context_len_)
+        print("[init] model/tokenizer/image_processor loaded.]")
         return True
     except Exception as e:
         warn(f"[init] failed: {e}")
         return False
+# ===================== EndpointHandler =====================
 class EndpointHandler:
+    """Hugging Face Endpoint compatible"""
     def __init__(self, model_dir):
         self.model_dir = model_dir
         print(f"EndpointHandler initialized with model_dir: {model_dir}")
         return get_model_info()
 if __name__ == "__main__":
+    print("Handler ready (Deterministic JSON→Narrative, age+sex aware). Use `EndpointHandler` or `query`.")
+# ===================== FastAPI Wrapper =====================
 try:
     from fastapi import FastAPI
     from pydantic import BaseModel
     warn(f"fastapi/pydantic not available: {e}")
 if FASTAPI_AVAILABLE:
+    app = FastAPI(title="PULSE ECG Handler API", version="1.2.0")
     class QueryIn(BaseModel):
         message: str | None = None
         repetition_penalty: float | None = None
         conv_mode: str | None = None
         det_seed: int | None = None
+        output_mode: str | None = None
+        patient_age_group: str | None = None
+        patient_sex: str | None = None
     @app.on_event("startup")
     async def _startup():
     async def _info():
         return get_model_info()
     @app.post("/query")
     async def _query(payload: QueryIn):
         return query({k: v for k, v in payload.dict().items() if v is not None})
         data["output_mode"] = "report_en"
         return query(data)
 else:
+    app = None