CanerDedeoglu
/

Rapid_ECG

@@ -45,13 +45,9 @@ from llava.constants import (
 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
-# Eksik fonksiyonu kaldır - artık mm_utils'ten import ediyoruz
-# def get_model_name_from_path() artık gerekli değil
 # Varsayılanlar
 DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
 MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
-# ATTN_IMPLEMENTATION artık otomatik seçiliyor, bu satırı kaldırıyoruz
 class EndpointHandler:
     """
@@ -97,8 +93,6 @@ class EndpointHandler:
         # Görsel token işaretleri (LLaVA config)
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
-        # Constants'tan direkt kullan
-        # self.image_token, self.im_start, self.im_end artık gerekli değil
     # ---- yardımcılar ----
     def _load_image(self, img_field: str) -> Optional[Image.Image]:
@@ -115,20 +109,24 @@ class EndpointHandler:
                 return Image.open(io.BytesIO(r.content)).convert("RGB")
             return Image.open(img_field).convert("RGB")
         except Exception as e:
-            # Görsel opsiyoneldir; okunamazsa kullanıcıya hata dönmek yerine None bırakabiliriz.
             print(f"[warn] image load failed: {e}")
             return None
-    def _build_prompt(self, user_text: str, conv_mode: str) -> str:
         if conv_mode not in conv_templates:
             conv_mode = DEFAULT_CONV_MODE
         conv = conv_templates[conv_mode].copy()
-        # Image token'ları doğru yerleştir
-        if self.use_im_start_end:
-            content = f"{DEFAULT_IM_START_TOKEN}{DEFAULT_IMAGE_TOKEN}{DEFAULT_IM_END_TOKEN}\n{user_text}"
         else:
-            content = f"{DEFAULT_IMAGE_TOKEN}\n{user_text}"
         conv.append_message(conv.roles[0], content)
         conv.append_message(conv.roles[1], None)
@@ -144,42 +142,71 @@ class EndpointHandler:
         query_text = inputs.get("query", "") or inputs.get("text", "") or inputs.get("prompt", "")
         image_f = inputs.get("image") or inputs.get("image_url") or inputs.get("image_base64")
-        # 1) prompt
-        prompt = self._build_prompt(query_text, conv_mode)
-        # 2) image -> tensor (opsiyonel)
         image_tensors = None
         if image_f:
             pil = self._load_image(image_f)
             if pil is not None:
                 try:
-                    # LLaVA'nın gelişmiş process_images fonksiyonunu kullan
-                    # Bu fonksiyon anyres, pad gibi farklı aspect ratio modlarını destekler
-                    image_tensors = process_images([pil], self.image_processor, self.model.config)
-                    if image_tensors is not None and len(image_tensors) > 0:
-                        image_tensors = image_tensors.to(self.model.device, dtype=torch.float16, non_blocking=True)
                 except Exception as e:
                     print(f"[warn] image processing failed: {e}")
                     image_tensors = None
-        # 3) tokenize (image token'ı gömülü)
-        input_ids = tokenizer_image_token(
-            prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
-        ).unsqueeze(0).to(self.model.device, non_blocking=True)  # unsqueeze ekledik
         # Input uzunluk kontrolü
         if input_ids.shape[-1] > self.context_len - 100:
-            # Prompt'u kısalt
             input_ids = input_ids[:, -(self.context_len - 200):]
-        # 4) güvenli max_new_tokens
         requested = int(params.get("max_new_tokens", MAX_NEW_TOKENS_DEF))
         avail = max(16, int(self.context_len) - int(input_ids.shape[-1]) - 8)
         max_new_tokens = max(1, min(requested, avail))
         gen_kwargs = {
             "input_ids": input_ids,
-            "images": image_tensors,
             "max_new_tokens": max_new_tokens,
             "temperature": float(params.get("temperature", 0.0)),
             "top_p": float(params.get("top_p", 1.0)),
@@ -189,6 +216,13 @@ class EndpointHandler:
             "pad_token_id": self.tokenizer.eos_token_id,
         }
         try:
             with torch.inference_mode():
                 output_ids = self.model.generate(**gen_kwargs)
@@ -202,5 +236,8 @@ class EndpointHandler:
         except Exception as e:
             print(f"Generation error: {e}")
             text = f"Error during generation: {str(e)}"
         return [{"generated_text": text}]

 from llava.conversation import conv_templates
 from llava.utils import disable_torch_init
 # Varsayılanlar
 DEFAULT_CONV_MODE  = os.getenv("LLAVA_CONV_MODE", "llava_v1")
 MAX_NEW_TOKENS_DEF = int(os.getenv("MAX_NEW_TOKENS", "1024"))
 class EndpointHandler:
     """
         # Görsel token işaretleri (LLaVA config)
         self.use_im_start_end = getattr(self.model.config, "mm_use_im_start_end", False)
     # ---- yardımcılar ----
     def _load_image(self, img_field: str) -> Optional[Image.Image]:
                 return Image.open(io.BytesIO(r.content)).convert("RGB")
             return Image.open(img_field).convert("RGB")
         except Exception as e:
             print(f"[warn] image load failed: {e}")
             return None
+    def _build_prompt(self, user_text: str, conv_mode: str, has_image: bool = False) -> str:
+        """Prompt oluştur - görüntü olup olmadığına göre"""
         if conv_mode not in conv_templates:
             conv_mode = DEFAULT_CONV_MODE
         conv = conv_templates[conv_mode].copy()
+        # Sadece görüntü varsa image token'ları ekle
+        if has_image:
+            if self.use_im_start_end:
+                content = f"{DEFAULT_IM_START_TOKEN}{DEFAULT_IMAGE_TOKEN}{DEFAULT_IM_END_TOKEN}\n{user_text}"
+            else:
+                content = f"{DEFAULT_IMAGE_TOKEN}\n{user_text}"
         else:
+            # Görüntü yoksa sadece text
+            content = user_text
         conv.append_message(conv.roles[0], content)
         conv.append_message(conv.roles[1], None)
         query_text = inputs.get("query", "") or inputs.get("text", "") or inputs.get("prompt", "")
         image_f = inputs.get("image") or inputs.get("image_url") or inputs.get("image_base64")
+        # 1) Görüntü işleme (önce)
         image_tensors = None
+        has_image = False
         if image_f:
             pil = self._load_image(image_f)
             if pil is not None:
                 try:
+                    # LLaVA'nın process_images fonksiyonunu kullan
+                    processed_images = process_images([pil], self.image_processor, self.model.config)
+                    if processed_images is not None:
+                        # Tensor formatını kontrol et ve düzelt
+                        if isinstance(processed_images, list):
+                            if len(processed_images) > 0:
+                                image_tensors = torch.stack(processed_images, dim=0)
+                            else:
+                                image_tensors = None
+                        else:
+                            image_tensors = processed_images
+                        if image_tensors is not None:
+                            image_tensors = image_tensors.to(
+                                self.model.device,
+                                dtype=torch.float16,
+                                non_blocking=True
+                            )
+                            has_image = True
+                            print(f"[info] Image processed successfully, shape: {image_tensors.shape}")
+                        else:
+                            print("[warn] Image processing returned None")
                 except Exception as e:
                     print(f"[warn] image processing failed: {e}")
                     image_tensors = None
+                    has_image = False
+        # 2) Prompt oluştur (görüntü durumuna göre)
+        prompt = self._build_prompt(query_text, conv_mode, has_image)
+        print(f"[debug] Generated prompt: {repr(prompt[:200])}")
+        # 3) Tokenize
+        if has_image:
+            # Görüntü varsa IMAGE_TOKEN_INDEX ile tokenize et
+            input_ids = tokenizer_image_token(
+                prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+            )
+        else:
+            # Görüntü yoksa normal tokenize
+            input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids[0]
+        # Batch dimension ekle
+        input_ids = input_ids.unsqueeze(0).to(self.model.device, non_blocking=True)
         # Input uzunluk kontrolü
         if input_ids.shape[-1] > self.context_len - 100:
             input_ids = input_ids[:, -(self.context_len - 200):]
+        # 4) Generation parameters
         requested = int(params.get("max_new_tokens", MAX_NEW_TOKENS_DEF))
         avail = max(16, int(self.context_len) - int(input_ids.shape[-1]) - 8)
         max_new_tokens = max(1, min(requested, avail))
         gen_kwargs = {
             "input_ids": input_ids,
             "max_new_tokens": max_new_tokens,
             "temperature": float(params.get("temperature", 0.0)),
             "top_p": float(params.get("top_p", 1.0)),
             "pad_token_id": self.tokenizer.eos_token_id,
         }
+        # Görüntü varsa images parametresini ekle
+        if has_image and image_tensors is not None:
+            gen_kwargs["images"] = image_tensors
+            print(f"[info] Using images in generation, shape: {image_tensors.shape}")
+        else:
+            print("[info] No images in generation")
         try:
             with torch.inference_mode():
                 output_ids = self.model.generate(**gen_kwargs)
         except Exception as e:
             print(f"Generation error: {e}")
+            import traceback
+            traceback.print_exc()
             text = f"Error during generation: {str(e)}"
         return [{"generated_text": text}]