CanerDedeoglu
/

Rapid_ECG

@@ -1,31 +1,25 @@
 # -*- coding: utf-8 -*-
-# handler.py — PULSE-7B / LLaVA robust endpoint
-# - LLaVA kaynak kodunu runtime'da git clone ile getirir
 # - image_processor fallback (AutoProcessor / vision_tower)
-# - anyres -> pad güvenli düşüş
-# - preprocess/call farkını soyutlama
-# - attention_mask zorunlu (HF generate NoneType.new_ones fix)
 # - forward patch (cache_position/input_positions sessizce düşür)
-# - robust image pipeline (pad_to_multiple, crop_size/shortest_edge tespiti)
-import os, io, sys, subprocess, base64
 from typing import Any, Dict, List, Optional, Tuple
 import torch
 from PIL import Image
 import requests
-import math
-# ===== Kullanılacak HF model id =====
 MODEL_ID = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
-# Flash Attention / attention impl ayarları (müsaitse kullanırız)
-os.environ.setdefault("FLASH_ATTENTION", "1")
-os.environ.setdefault("ATTN_IMPLEMENTATION", "flash_attention_2")
-# ===== LLaVA kaynak kodunu runtime'da getir (pip yoksa!) =====
 LLAVA_GIT_URL = os.getenv("LLAVA_GIT_URL", "https://github.com/haotian-liu/LLaVA.git")
-LLAVA_GIT_REF = os.getenv("LLAVA_GIT_REF", "v1.2.2.post1")  # stabil bir sürüm
 LLAVA_SRC_DIR = os.getenv("LLAVA_SRC_DIR", "/tmp/llava_src/LLaVA")
 def _ensure_llava():
@@ -51,168 +45,16 @@ from llava.constants import (
 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
 # HF processor fallback'ları
 from transformers import AutoProcessor, AutoImageProcessor, CLIPImageProcessor
-# ==========================
-# Yardımcı Fonksiyonlar
-# ==========================
-def get_model_name_from_path(model_path: str) -> str:
-    p = model_path.strip("/").split("/")
-    return (p[-2] + "_" + p[-1]) if p[-1].startswith("checkpoint-") else p[-1]
-def load_image_from_base64(image: str) -> Image.Image:
-    return Image.open(io.BytesIO(base64.b64decode(image)))
-def expand2square(pil_img: Image.Image, background_color: Tuple[int,int,int]) -> Image.Image:
-    w, h = pil_img.size
-    if w == h:
-        return pil_img
-    if w > h:
-        result = Image.new(pil_img.mode, (w, w), background_color); result.paste(pil_img, (0, (w - h)//2)); return result
-    result = Image.new(pil_img.mode, (h, h), background_color); result.paste(pil_img, ((h - w)//2, 0)); return result
-def select_best_resolution(original_size: Tuple[int,int], possible_resolutions: List[Tuple[int,int]]) -> Tuple[int,int]:
-    ow, oh = original_size
-    best, max_eff, min_waste = None, 0, float("inf")
-    for W, H in possible_resolutions:
-        s = min(W/ow, H/oh)
-        dw, dh = int(ow*s), int(oh*s)
-        eff = min(dw*dh, ow*oh)
-        waste = (W*H) - eff
-        if (eff > max_eff) or (eff == max_eff and waste < min_waste):
-            max_eff, min_waste, best = eff, waste, (W, H)
-    return best
-def resize_and_pad_image(image: Image.Image, target_resolution: Tuple[int,int]) -> Image.Image:
-    ow, oh = image.size
-    W, H = target_resolution
-    sw, sh = W/ow, H/oh
-    if sw < sh:
-        nw, nh = W, min(math.ceil(oh*sw), H)
-    else:
-        nh, nw = H, min(math.ceil(ow*sh), W)
-    resized = image.resize((nw, nh))
-    canvas = Image.new("RGB", (W, H), (0,0,0))
-    canvas.paste(resized, ((W - nw)//2, (H - nh)//2))
-    return canvas
-def pad_to_multiple(image: Image.Image, multiple: int) -> Image.Image:
-    w, h = image.size
-    W = math.ceil(w / multiple) * multiple
-    H = math.ceil(h / multiple) * multiple
-    if (W, H) == (w, h):
-        return image
-    canvas = Image.new(image.mode, (W, H), (0,0,0))
-    canvas.paste(image, (0,0))
-    return canvas
-def divide_to_patches(image: Image.Image, patch_size: int) -> List[Image.Image]:
-    patches = []
-    W, H = image.size
-    for y in range(0, H, patch_size):
-        for x in range(0, W, patch_size):
-            patches.append(image.crop((x, y, x+patch_size, y+patch_size)))
-    return patches
-def _get_crop_size(processor: Any, default: int = 224) -> int:
-    cs = getattr(processor, "crop_size", None)
-    if cs is None:
-        sz = getattr(processor, "size", None)
-        if isinstance(sz, dict): return int(sz.get("shortest_edge", default))
-        if isinstance(sz, int):  return int(sz)
-        return int(default)
-    if isinstance(cs, dict):
-        if "height" in cs: return int(cs["height"])
-        if "shortest_edge" in cs: return int(cs["shortest_edge"])
-        for v in cs.values(): return int(v)
-    return int(cs)
-def _get_shortest_edge(processor: Any, fallback: Optional[int] = None) -> int:
-    sz = getattr(processor, "size", None)
-    if isinstance(sz, dict) and "shortest_edge" in sz: return int(sz["shortest_edge"])
-    if isinstance(sz, int): return int(sz)
-    return _get_crop_size(processor, default=(fallback or 224))
-def _preprocess_one(processor: Any, img: Image.Image) -> torch.Tensor:
-    if hasattr(processor, "preprocess"):
-        out = processor.preprocess(img, return_tensors="pt")
-    else:
-        out = processor(img, return_tensors="pt")
-    return out["pixel_values"][0]
-def process_anyres_image(image: Image.Image, processor: Any, grid_pinpoints: Any) -> torch.Tensor:
-    if isinstance(grid_pinpoints, list):
-        poss = grid_pinpoints
-    else:
-        import ast
-        poss = ast.literal_eval(grid_pinpoints)
-    patch_size = _get_crop_size(processor, 224)
-    shortest = _get_shortest_edge(processor, fallback=patch_size)
-    best = select_best_resolution(image.size, poss)
-    padded = resize_and_pad_image(image, best)
-    padded = pad_to_multiple(padded, patch_size)
-    patches = divide_to_patches(padded, patch_size)
-    resized_orig = image.resize((shortest, shortest))
-    tensors = [_preprocess_one(processor, resized_orig)] + [_preprocess_one(processor, p) for p in patches]
-    return torch.stack(tensors, dim=0)
-def process_images(images: List[Image.Image], image_processor: Any, model_cfg: Any) -> torch.Tensor:
-    iar = getattr(model_cfg, "image_aspect_ratio", None) or getattr(model_cfg, "mm_image_aspect_ratio", None)
-    new_images: List[torch.Tensor] = []
-    if iar == "pad":
-        for img in images:
-            img_mean = getattr(image_processor, "image_mean", [0.5,0.5,0.5])
-            bg = tuple(int(x*255) for x in img_mean)
-            sq = expand2square(img, bg)
-            new_images.append(_preprocess_one(image_processor, sq))
-    elif iar == "anyres":
-        grid = getattr(model_cfg, "image_grid_pinpoints", "[(336,336)]")
-        for img in images:
-            new_images.append(process_anyres_image(img, image_processor, grid))
-    else:
-        # toplu çağrı başarısız olursa tek tek dene
-        try:
-            out = image_processor(images, return_tensors="pt")
-            return out["pixel_values"]
-        except TypeError:
-            outs = [image_processor(im, return_tensors="pt") for im in images]
-            pix = [o["pixel_values"][0] for o in outs]
-            return torch.stack(pix, dim=0)
-    if all(x.shape == new_images[0].shape for x in new_images):
-        return torch.stack(new_images, dim=0)
-    return new_images
-def tokenizer_image_token(prompt: str, tokenizer: Any, image_token_index: int = IMAGE_TOKEN_INDEX,
-                          return_tensors: Optional[str] = None):
-    chunks = [tokenizer(chunk).input_ids for chunk in prompt.split("<image>")]
-    def insert_sep(X, sep):
-        return [e for sub in zip(X, [sep]*len(X)) for e in sub][:-1]
-    ids: List[int] = []
-    offset = 0
-    if len(chunks) > 0 and len(chunks[0]) > 0 and chunks[0][0] == tokenizer.bos_token_id:
-        offset = 1
-        ids.append(chunks[0][0])
-    for x in insert_sep(chunks, [image_token_index]*(offset+1)):
-        ids.extend(x[offset:])
-    if return_tensors is not None:
-        if return_tensors == "pt":
-            return torch.tensor(ids, dtype=torch.long)
-        raise ValueError(f"Unsupported tensor type: {return_tensors}")
-    return ids
-# ==========================
-# Endpoint Handler
-# ==========================
 class EndpointHandler:
     """
@@ -237,20 +79,19 @@ class EndpointHandler:
             model_path = os.getenv("HF_MODEL_ID").strip()
         else:
             model_path = MODEL_ID
         if not model_path:
             raise RuntimeError("Model path belirlenemedi. HF_MODEL_LOCAL_DIR / HF_MODEL_ID / MODEL_ID ayarla.")
         self.model_name = get_model_name_from_path(model_path)
-        # Attention implementation (flash varsa flash, yoksa sdpa)
         try:
             import flash_attn  # noqa: F401
             attn_impl = "flash_attention_2"
         except Exception:
             attn_impl = "sdpa"
-        # Model yükle (LLaVA loader)
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
@@ -261,7 +102,7 @@ class EndpointHandler:
         )
         self.model.eval()
-        # ---- forward patch (HF 4.43+ arg uyumu) ----
         def _patch_forward(obj, label="model"):
             try:
                 if not hasattr(obj, "forward"): return False
@@ -309,12 +150,20 @@ class EndpointHandler:
         # multimodal bayraklar
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
-        self.is_multimodal = 'llava' in self.model_name.lower() or 'pulse' in self.model_name.lower()
         # Varsayılanlar
         self.DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
         self.MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
     # -------------------------
     # İç yardımcılar
     # -------------------------
@@ -368,7 +217,6 @@ class EndpointHandler:
                     image_sizes = [pil_image.size]
                     processed_images = process_images(images_list, self.image_processor, self.model.config)
-                    # tensor/list to device + dtype
                     if isinstance(processed_images, list):
                         images = [img.to(self.model.device, dtype=torch.float16) for img in processed_images]
                     else:
@@ -388,7 +236,7 @@ class EndpointHandler:
                 import traceback; traceback.print_exc()
                 images = None; image_sizes = None
-        # 3) Tokenization (+ attention_mask)
         try:
             input_ids = tokenizer_image_token(
                 prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt'
@@ -399,6 +247,7 @@ class EndpointHandler:
             input_ids = enc.input_ids.to(self.model.device)
             images = None; image_sizes = None
         attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device)
         # 4) Generation params
@@ -408,7 +257,6 @@ class EndpointHandler:
         max_new_tokens = min(int(params.get("max_new_tokens", self.MAX_NEW_TOKENS_DEF)), 1024)
         do_sample = bool(params.get("do_sample", temperature > 0.001))
-        # Context length sınırı (güvenli boşluk)
         max_context_length = getattr(self.model.config, 'max_position_embeddings', 4096)
         max_new_tokens = min(max_new_tokens, max(1, max_context_length - input_ids.shape[-1] - 50))
         if max_new_tokens < 1:
@@ -417,7 +265,6 @@ class EndpointHandler:
         # 5) Gen kwargs
         gen_kwargs: Dict[str, Any] = {
             "inputs": input_ids,
-            "attention_mask": attention_mask,
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
@@ -426,6 +273,8 @@ class EndpointHandler:
             "use_cache": bool(params.get("use_cache", True)),
             "pad_token_id": self.tokenizer.eos_token_id,
         }
         if images is not None and image_sizes is not None:
             gen_kwargs["images"] = images
@@ -439,9 +288,9 @@ class EndpointHandler:
             if prompt_clean != prompt:
                 try:
                     input_ids = self.tokenizer(prompt_clean, return_tensors="pt").input_ids.to(self.model.device)
-                    attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device)
                     gen_kwargs["inputs"] = input_ids
-                    gen_kwargs["attention_mask"] = attention_mask
                 except Exception as e:
                     print(f"[warn] prompt cleanup failed: {e}")
             print("[info] Text-only generation.")

 # -*- coding: utf-8 -*-
+# handler.py — PULSE-7B / LLaVA endpoint (mm_utils_local ile)
+# - LLaVA kaynaklarını runtime'da git clone ile getirir (model builder, conv, constants)
+# - Görsel işleme: mm_utils_local.process_images / tokenizer_image_token
 # - image_processor fallback (AutoProcessor / vision_tower)
+# - anyres -> pad güvenli düşüş (mm_utils_local zaten robust)
 # - forward patch (cache_position/input_positions sessizce düşür)
+# - attention_mask: model destekliyorsa gönder (unused kwargs hatasını önlemek için koşullu)
+import os, io, sys, subprocess, base64, inspect
 from typing import Any, Dict, List, Optional, Tuple
 import torch
 from PIL import Image
 import requests
+# ===== Model ID =====
 MODEL_ID = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
+# ===== LLaVA kaynaklarını runtime'da çek =====
 LLAVA_GIT_URL = os.getenv("LLAVA_GIT_URL", "https://github.com/haotian-liu/LLaVA.git")
+LLAVA_GIT_REF = os.getenv("LLAVA_GIT_REF", "v1.2.2.post1")
 LLAVA_SRC_DIR = os.getenv("LLAVA_SRC_DIR", "/tmp/llava_src/LLaVA")
 def _ensure_llava():
 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
+# ---- mm_utils_local (senin dosyan) ----
+from mm_utils_local import (
+    tokenizer_image_token,
+    process_images,
+    get_model_name_from_path,
+)
 # HF processor fallback'ları
 from transformers import AutoProcessor, AutoImageProcessor, CLIPImageProcessor
 class EndpointHandler:
     """
             model_path = os.getenv("HF_MODEL_ID").strip()
         else:
             model_path = MODEL_ID
         if not model_path:
             raise RuntimeError("Model path belirlenemedi. HF_MODEL_LOCAL_DIR / HF_MODEL_ID / MODEL_ID ayarla.")
         self.model_name = get_model_name_from_path(model_path)
+        # Attention implementation seçimi
         try:
             import flash_attn  # noqa: F401
             attn_impl = "flash_attention_2"
         except Exception:
             attn_impl = "sdpa"
+        # Modeli yükle
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
         )
         self.model.eval()
+        # ---- forward patch: yeni HF arg uyumu ----
         def _patch_forward(obj, label="model"):
             try:
                 if not hasattr(obj, "forward"): return False
         # multimodal bayraklar
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
+        self.is_multimodal = ('llava' in self.model_name.lower()) or ('pulse' in self.model_name.lower())
         # Varsayılanlar
         self.DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
         self.MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
+        # attention_mask desteğini bir kez tespit et
+        self._supports_attention_mask = False
+        try:
+            sig = inspect.signature(self.model.forward)
+            self._supports_attention_mask = ("attention_mask" in sig.parameters)
+        except Exception:
+            self._supports_attention_mask = False
     # -------------------------
     # İç yardımcılar
     # -------------------------
                     image_sizes = [pil_image.size]
                     processed_images = process_images(images_list, self.image_processor, self.model.config)
                     if isinstance(processed_images, list):
                         images = [img.to(self.model.device, dtype=torch.float16) for img in processed_images]
                     else:
                 import traceback; traceback.print_exc()
                 images = None; image_sizes = None
+        # 3) Tokenization
         try:
             input_ids = tokenizer_image_token(
                 prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt'
             input_ids = enc.input_ids.to(self.model.device)
             images = None; image_sizes = None
+        # attention_mask: model destekliyorsa üret ve ekleyeceğiz
         attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device)
         # 4) Generation params
         max_new_tokens = min(int(params.get("max_new_tokens", self.MAX_NEW_TOKENS_DEF)), 1024)
         do_sample = bool(params.get("do_sample", temperature > 0.001))
         max_context_length = getattr(self.model.config, 'max_position_embeddings', 4096)
         max_new_tokens = min(max_new_tokens, max(1, max_context_length - input_ids.shape[-1] - 50))
         if max_new_tokens < 1:
         # 5) Gen kwargs
         gen_kwargs: Dict[str, Any] = {
             "inputs": input_ids,
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
             "use_cache": bool(params.get("use_cache", True)),
             "pad_token_id": self.tokenizer.eos_token_id,
         }
+        if self._supports_attention_mask:
+            gen_kwargs["attention_mask"] = attention_mask
         if images is not None and image_sizes is not None:
             gen_kwargs["images"] = images
             if prompt_clean != prompt:
                 try:
                     input_ids = self.tokenizer(prompt_clean, return_tensors="pt").input_ids.to(self.model.device)
                     gen_kwargs["inputs"] = input_ids
+                    if self._supports_attention_mask:
+                        gen_kwargs["attention_mask"] = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device)
                 except Exception as e:
                     print(f"[warn] prompt cleanup failed: {e}")
             print("[info] Text-only generation.")