CanerDedeoglu
/

Rapid_ECG

@@ -1,11 +1,10 @@
 # -*- coding: utf-8 -*-
-# handler.py — PULSE-7B / LLaVA robust endpoint (minimal & stable)
-# - PULSE fork (AIMedLab/PULSE:dev) üzerinden LLaVA yükleme
-# - Güvenli image loader + processor normalizasyonu
-# - ANYRES->PAD fallback
-# - Forward patch: cache_position/input_positions sessizce at
-# - KRİTİK FIX: generate çağrısına hem `inputs` hem de `input_ids` ver (NoneType.new_ones biter)
-# - attention_mask gönderme (LLaVA kendi içinde hallediyor)
 import os, io, sys, subprocess, base64
 from typing import Any, Dict, List, Optional, Tuple
@@ -18,15 +17,15 @@ import ast
 import inspect
 from urllib.parse import urlparse
-# ===== Model/config =====
 MODEL_ID = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
 DEFAULT_VISION_TOWER_ID = os.getenv("HF_VISION_TOWER_ID", "openai/clip-vit-large-patch14-336")
-# ===== Flash Attention/env =====
 os.environ.setdefault("FLASH_ATTENTION", "1")
 os.environ.setdefault("ATTN_IMPLEMENTATION", "flash_attention_2")
-# ===== Pull LLaVA from PULSE repo =====
 LLAVA_GIT_URL = os.getenv("LLAVA_GIT_URL", "https://github.com/AIMedLab/PULSE.git")
 LLAVA_GIT_REF = os.getenv("LLAVA_GIT_REF", "dev")
 LLAVA_SRC_DIR = os.getenv("LLAVA_SRC_DIR", "/tmp/llava_src/PULSE/LLaVA")
@@ -48,6 +47,7 @@ _ensure_llava()
 try:
     from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path, load_image_from_base64
 except Exception:
     from llava.constants import IMAGE_TOKEN_INDEX
     def expand2square(pil_img: Image.Image, background_color: Tuple[int,int,int]) -> Image.Image:
@@ -129,15 +129,12 @@ except Exception:
         chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
         def insert_sep(X, sep):
             return [e for sub in zip(X, [sep]*len(X)) for e in sub][:-1]
-        ids = []
-        offset = 0
         if len(chunks) > 0 and len(chunks[0]) > 0 and chunks[0][0] == tokenizer.bos_token_id:
-            offset = 1
-            ids.append(chunks[0][0])
-        for x in insert_sep(chunks, [IMAGE_TOKEN_INDEX]*(offset+1)):
             ids.extend(x[offset:])
-        if return_tensors == 'pt':
-            return torch.tensor(ids, dtype=torch.long)
         return ids
     def get_model_name_from_path(model_path):
@@ -147,7 +144,7 @@ except Exception:
     def load_image_from_base64(image):
         return Image.open(io.BytesIO(base64.b64decode(image)))
-# ---- LLaVA (PULSE fork) ----
 from llava.model.builder import load_pretrained_model
 from llava.constants import (
     IMAGE_TOKEN_INDEX,
@@ -157,7 +154,10 @@ from llava.constants import (
 )
 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
 from transformers import AutoProcessor, AutoImageProcessor, CLIPImageProcessor
 DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
 MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
@@ -175,12 +175,14 @@ class EndpointHandler:
         self.model_name = get_model_name_from_path(model_path)
         try:
             import flash_attn  # noqa
             attn_impl = "flash_attention_2"
         except Exception:
             attn_impl = "sdpa"
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
@@ -204,7 +206,7 @@ class EndpointHandler:
         except Exception:
             pass
-        # forward patch: gereksiz arg'ları sil
         def _patch_forward(obj, label="model"):
             try:
                 if not hasattr(obj, "forward"): return False
@@ -234,7 +236,7 @@ class EndpointHandler:
             except Exception as e:
                 print(f"[warn] AutoProcessor başarısız: {e}")
                 vt_id = self._resolve_vision_tower_id(self.model.config)
-                print(f"[hotfix] trying to load image_processor from vision_tower: {vt_id}")
                 try:
                     self.image_processor = AutoImageProcessor.from_pretrained(vt_id, trust_remote_code=True)
                     print("[info] image_processor loaded via AutoImageProcessor(vision_tower)")
@@ -263,14 +265,14 @@ class EndpointHandler:
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
         self.is_multimodal = ('llava' in self.model_name.lower()) or ('pulse' in self.model_name.lower())
-    # ------------- helpers -------------
     def _resolve_vision_tower_id(self, config: Any) -> str:
         for key in ("mm_vision_tower", "vision_tower", "mm_vision_tower_name", "image_tower", "visual_encoder"):
             v = getattr(config, key, None)
             if isinstance(v, str) and v.strip(): return v.strip()
         try:
-            v = getattr(config, "vision_tower", None)
-            name = getattr(getattr(v, "config", None), "_name_or_path", None)
             if isinstance(name, str) and name.strip(): return name.strip()
         except Exception:
             pass
@@ -318,6 +320,7 @@ class EndpointHandler:
                 return True
         try:
             if isinstance(image_input, str) and image_input.startswith(("http://", "https://")):
                 if not _is_valid_image_format(image_input):
                     print("[warn] Invalid image extension in URL"); return None
@@ -333,6 +336,7 @@ class EndpointHandler:
                 img = Image.open(io.BytesIO(data)).convert("RGB")
                 print(f"[info] URL image loaded: size={img.size}"); return img
             if isinstance(image_input, str):
                 b64 = image_input.strip()
                 if b64.startswith("data:image"):
@@ -346,6 +350,7 @@ class EndpointHandler:
                 img = Image.open(io.BytesIO(data)).convert("RGB")
                 print(f"[info] Base64 image loaded: size={img.size}"); return img
             if isinstance(image_input, str) and os.path.exists(image_input):
                 img = Image.open(image_input).convert("RGB")
                 print(f"[info] Local image loaded: size={img.size}"); return img
@@ -362,7 +367,13 @@ class EndpointHandler:
         conv.append_message(conv.roles[1], None)
         return conv.get_prompt()
-    # ------------- inference -------------
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         inputs = data.get("inputs") or {}
         params = data.get("parameters") or {}
@@ -382,7 +393,7 @@ class EndpointHandler:
             try:
                 pil_image = self._load_image(image_f)
                 if pil_image is not None and self.image_processor is not None:
-                    processed_images = process_images([pil_image], self.image_processor, self.model.config)
                     # model device/dtype
                     try:
                         mdev = next(self.model.parameters()).device
@@ -390,12 +401,13 @@ class EndpointHandler:
                     except Exception:
                         mdev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                         mdtype = torch.float16 if mdev.type == "cuda" else torch.float32
-                    if isinstance(processed_images, list):
-                        images = [img.to(mdev, dtype=mdtype) for img in processed_images]
                     else:
-                        images = processed_images.to(mdev, dtype=mdtype)
                     image_sizes = [pil_image.size]
-                    # image tokens
                     prompt = DEFAULT_IMAGE_TOKEN + '\n' + prompt
                     rep = DEFAULT_IMAGE_TOKEN
                     if self.use_im_start_end:
@@ -412,20 +424,19 @@ class EndpointHandler:
         # 3) tokenize
         try:
             mdev = next(self.model.parameters()).device
-            input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt') \
-                            .unsqueeze(0).to(mdev)
             print(f"[debug] input_ids shape: {input_ids.shape} | has images: {images is not None}")
         except Exception as e:
             print(f"[error] Tokenization failed: {e}")
-            try:
-                input_ids = self.tokenizer(query_text, return_tensors="pt").input_ids.to(next(self.model.parameters()).device)
-                images = None; image_sizes = None
-                print("[warn] Fallback to basic tokenization without image tokens")
-            except Exception as e2:
-                print(f"[error] Even basic tokenization failed: {e2}")
-                return [{"generated_text": f"Error: Tokenization failed: {str(e)}"}]
-        # 4) gen params (attention_mask YOK)
         temperature = float(params.get("temperature", 0.0))
         top_p = float(params.get("top_p", 1.0))
         repetition_penalty = float(params.get("repetition_penalty", 1.0))
@@ -437,50 +448,56 @@ class EndpointHandler:
         if max_new_tokens < 1:
             return [{"generated_text": "Error: Input too long, exceeds max token length."}]
-        gen_kwargs = {
-            # KRİTİK: Hem `inputs` hem de `input_ids` veriyoruz
-            "inputs": input_ids,
-            "input_ids": input_ids,
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
             "repetition_penalty": repetition_penalty,
             "do_sample": do_sample,
-            # attention_mask verme!
             "use_cache": bool(params.get("use_cache", True)),
             "pad_token_id": self.tokenizer.pad_token_id,
             "eos_token_id": getattr(self.tokenizer, "eos_token_id", None),
             "bos_token_id": getattr(self.tokenizer, "bos_token_id", None),
         }
         if images is not None and image_sizes is not None:
             gen_kwargs["images"] = images
             gen_kwargs["image_sizes"] = image_sizes
-        # 5) generate
         try:
             with torch.inference_mode():
-                output = self.model.generate(**gen_kwargs)
         except Exception as e:
-            # Son çare: cache kapalı tekrar dene
-            print(f"[warn] First generate failed: {e} | retry with use_cache=False")
-            gen_kwargs["use_cache"] = False
             try:
                 with torch.inference_mode():
-                    output = self.model.generate(**gen_kwargs)
             except Exception as e2:
-                print(f"[error] Generation failed: {e2}")
-                import traceback; traceback.print_exc()
                 return [{"generated_text": f"Error during generation: {str(e2)}"}]
-        # 6) decode
         try:
             sequences = output.sequences if hasattr(output, "sequences") else output
-            input_len = input_ids.shape[1]
-            response_ids = sequences[:, input_len:] if sequences.shape[-1] > input_len else sequences
-            text = self.tokenizer.batch_decode(response_ids, skip_special_tokens=True)[0].strip()
             if not text:
-                text = "Error: Empty response generated"
             return [{"generated_text": text}]
         except Exception as e:
-            print(f"[error] Response decoding failed: {e}")
-            return [{"generated_text": f"Error: Response decoding failed: {str(e)}"}]

 # -*- coding: utf-8 -*-
+# handler.py — PULSE-7B / LLaVA robust endpoint (final fix)
+# - Kaynak: AIMedLab/PULSE (dev) LLaVA fork
+# - Güvenli image load + processor normalize
+# - DOLU attention_mask oluşturma
+# - Üretimi HF GenerationMixin ile çağır (LLaVA generate override'ını bypass)
+# - forward() patch: cache_position/input_positions düşür
 import os, io, sys, subprocess, base64
 from typing import Any, Dict, List, Optional, Tuple
 import inspect
 from urllib.parse import urlparse
+# ===== Model / Config =====
 MODEL_ID = os.getenv("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
 DEFAULT_VISION_TOWER_ID = os.getenv("HF_VISION_TOWER_ID", "openai/clip-vit-large-patch14-336")
+# Flash Attention
 os.environ.setdefault("FLASH_ATTENTION", "1")
 os.environ.setdefault("ATTN_IMPLEMENTATION", "flash_attention_2")
+# ===== LLaVA (AIMedLab/PULSE dev) kaynak kodunu getir =====
 LLAVA_GIT_URL = os.getenv("LLAVA_GIT_URL", "https://github.com/AIMedLab/PULSE.git")
 LLAVA_GIT_REF = os.getenv("LLAVA_GIT_REF", "dev")
 LLAVA_SRC_DIR = os.getenv("LLAVA_SRC_DIR", "/tmp/llava_src/PULSE/LLaVA")
 try:
     from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path, load_image_from_base64
 except Exception:
+    # Minimal fallback'lar
     from llava.constants import IMAGE_TOKEN_INDEX
     def expand2square(pil_img: Image.Image, background_color: Tuple[int,int,int]) -> Image.Image:
         chunks = [tokenizer(chunk).input_ids for chunk in prompt.split('<image>')]
         def insert_sep(X, sep):
             return [e for sub in zip(X, [sep]*len(X)) for e in sub][:-1]
+        ids = []; offset = 0
         if len(chunks) > 0 and len(chunks[0]) > 0 and chunks[0][0] == tokenizer.bos_token_id:
+            offset = 1; ids.append(chunks[0][0])
+        for x in insert_sep(chunks, [image_token_index]*(offset+1)):
             ids.extend(x[offset:])
+        if return_tensors == 'pt': return torch.tensor(ids, dtype=torch.long)
         return ids
     def get_model_name_from_path(model_path):
     def load_image_from_base64(image):
         return Image.open(io.BytesIO(base64.b64decode(image)))
+# ---- LLaVA parçaları ----
 from llava.model.builder import load_pretrained_model
 from llava.constants import (
     IMAGE_TOKEN_INDEX,
 )
 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
 from transformers import AutoProcessor, AutoImageProcessor, CLIPImageProcessor
+# ÖNEMLİ: HF GenerationMixin'i doğrudan çağıracağız (LLaVA override'ını bypass)
+from transformers.generation.utils import GenerationMixin as HFGenerationMixin
 DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
 MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
         self.model_name = get_model_name_from_path(model_path)
+        # attention impl
         try:
             import flash_attn  # noqa
             attn_impl = "flash_attention_2"
         except Exception:
             attn_impl = "sdpa"
+        # LLaVA/PULSE modeli yükle
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
         except Exception:
             pass
+        # forward patch: bilinmeyen kwargs'ları sessiz düşür
         def _patch_forward(obj, label="model"):
             try:
                 if not hasattr(obj, "forward"): return False
             except Exception as e:
                 print(f"[warn] AutoProcessor başarısız: {e}")
                 vt_id = self._resolve_vision_tower_id(self.model.config)
+                print(f"[hotfix] trying vision_tower: {vt_id}")
                 try:
                     self.image_processor = AutoImageProcessor.from_pretrained(vt_id, trust_remote_code=True)
                     print("[info] image_processor loaded via AutoImageProcessor(vision_tower)")
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
         self.is_multimodal = ('llava' in self.model_name.lower()) or ('pulse' in self.model_name.lower())
+    # ---------- helpers ----------
     def _resolve_vision_tower_id(self, config: Any) -> str:
         for key in ("mm_vision_tower", "vision_tower", "mm_vision_tower_name", "image_tower", "visual_encoder"):
             v = getattr(config, key, None)
             if isinstance(v, str) and v.strip(): return v.strip()
         try:
+            vt = getattr(config, "vision_tower", None)
+            name = getattr(getattr(vt, "config", None), "_name_or_path", None)
             if isinstance(name, str) and name.strip(): return name.strip()
         except Exception:
             pass
                 return True
         try:
+            # URL
             if isinstance(image_input, str) and image_input.startswith(("http://", "https://")):
                 if not _is_valid_image_format(image_input):
                     print("[warn] Invalid image extension in URL"); return None
                 img = Image.open(io.BytesIO(data)).convert("RGB")
                 print(f"[info] URL image loaded: size={img.size}"); return img
+            # Base64 (data URL dahil)
             if isinstance(image_input, str):
                 b64 = image_input.strip()
                 if b64.startswith("data:image"):
                 img = Image.open(io.BytesIO(data)).convert("RGB")
                 print(f"[info] Base64 image loaded: size={img.size}"); return img
+            # Yerel path
             if isinstance(image_input, str) and os.path.exists(image_input):
                 img = Image.open(image_input).convert("RGB")
                 print(f"[info] Local image loaded: size={img.size}"); return img
         conv.append_message(conv.roles[1], None)
         return conv.get_prompt()
+    def _create_attention_mask(self, input_ids: torch.Tensor) -> torch.Tensor:
+        attn = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device)
+        if self.tokenizer.pad_token_id is not None:
+            attn = attn.masked_fill(input_ids == self.tokenizer.pad_token_id, 0)
+        return attn
+    # ---------- inference ----------
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         inputs = data.get("inputs") or {}
         params = data.get("parameters") or {}
             try:
                 pil_image = self._load_image(image_f)
                 if pil_image is not None and self.image_processor is not None:
+                    processed = process_images([pil_image], self.image_processor, self.model.config)
                     # model device/dtype
                     try:
                         mdev = next(self.model.parameters()).device
                     except Exception:
                         mdev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                         mdtype = torch.float16 if mdev.type == "cuda" else torch.float32
+                    if isinstance(processed, list):
+                        images = [img.to(mdev, dtype=mdtype) for img in processed]
                     else:
+                        images = processed.to(mdev, dtype=mdtype)
                     image_sizes = [pil_image.size]
+                    # image token(ları)
                     prompt = DEFAULT_IMAGE_TOKEN + '\n' + prompt
                     rep = DEFAULT_IMAGE_TOKEN
                     if self.use_im_start_end:
         # 3) tokenize
         try:
             mdev = next(self.model.parameters()).device
+            input_ids = tokenizer_image_token(
+                prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt'
+            ).unsqueeze(0).to(mdev)
             print(f"[debug] input_ids shape: {input_ids.shape} | has images: {images is not None}")
         except Exception as e:
             print(f"[error] Tokenization failed: {e}")
+            input_ids = self.tokenizer(query_text, return_tensors="pt").input_ids.to(next(self.model.parameters()).device)
+            images = None; image_sizes = None
+        # 4) attention mask
+        attention_mask = self._create_attention_mask(input_ids)
+        # 5) generation params
         temperature = float(params.get("temperature", 0.0))
         top_p = float(params.get("top_p", 1.0))
         repetition_penalty = float(params.get("repetition_penalty", 1.0))
         if max_new_tokens < 1:
             return [{"generated_text": "Error: Input too long, exceeds max token length."}]
+        # 6) HF GenerationMixin ile üret (LLaVA generate override BYPASS)
+        common_params = {
             "max_new_tokens": max_new_tokens,
             "temperature": temperature,
             "top_p": top_p,
             "repetition_penalty": repetition_penalty,
             "do_sample": do_sample,
             "use_cache": bool(params.get("use_cache", True)),
             "pad_token_id": self.tokenizer.pad_token_id,
             "eos_token_id": getattr(self.tokenizer, "eos_token_id", None),
             "bos_token_id": getattr(self.tokenizer, "bos_token_id", None),
         }
+        gen_kwargs = {
+            "inputs": input_ids,               # DİKKAT: 'inputs'
+            "attention_mask": attention_mask,  # Maske burada
+            **common_params
+        }
         if images is not None and image_sizes is not None:
             gen_kwargs["images"] = images
             gen_kwargs["image_sizes"] = image_sizes
         try:
             with torch.inference_mode():
+                output = HFGenerationMixin.generate(self.model, **gen_kwargs)
         except Exception as e:
+            # son çare: masksiz minimal
+            print(f"[warn] HF mixin generate failed: {e} | retry minimal no-mask")
             try:
+                minimal = {
+                    "max_new_tokens": max_new_tokens,
+                    "do_sample": False,
+                    "temperature": 0.0,
+                    "use_cache": False,
+                    "pad_token_id": self.tokenizer.pad_token_id,
+                }
                 with torch.inference_mode():
+                    output = HFGenerationMixin.generate(self.model, inputs=input_ids, **minimal)
             except Exception as e2:
                 return [{"generated_text": f"Error during generation: {str(e2)}"}]
+        # 7) decode
         try:
             sequences = output.sequences if hasattr(output, "sequences") else output
+            in_len = input_ids.shape[1]
+            resp_ids = sequences[:, in_len:] if sequences.shape[-1] > in_len else sequences
+            text = self.tokenizer.batch_decode(resp_ids, skip_special_tokens=True)[0].strip()
             if not text:
+                text = "Error: Empty response"
             return [{"generated_text": text}]
         except Exception as e:
+            print(f"[error] Decoding failed: {e}")
+            return [{"generated_text": f"Error during decoding: {str(e)}"}]