CanerDedeoglu
/

Rapid_ECG

@@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 import os, io, sys, subprocess, base64
 from typing import Any, Dict, List, Optional
@@ -121,9 +122,11 @@ class EndpointHandler:
         # Sadece görüntü varsa image token'ları ekle
         if has_image:
             if self.use_im_start_end:
-                content = f"{DEFAULT_IM_START_TOKEN}{DEFAULT_IMAGE_TOKEN}{DEFAULT_IM_END_TOKEN}\n{user_text}"
             else:
-                content = f"{DEFAULT_IMAGE_TOKEN}\n{user_text}"
         else:
             # Görüntü yoksa sadece text
             content = user_text
@@ -163,7 +166,7 @@ class EndpointHandler:
                         else:
                             image_tensors = processed_images
-                        if image_tensors is not None:
                             image_tensors = image_tensors.to(
                                 self.model.device,
                                 dtype=torch.float16,
@@ -172,29 +175,62 @@ class EndpointHandler:
                             has_image = True
                             print(f"[info] Image processed successfully, shape: {image_tensors.shape}")
                         else:
-                            print("[warn] Image processing returned None")
                 except Exception as e:
                     print(f"[warn] image processing failed: {e}")
                     image_tensors = None
                     has_image = False
         # 2) Prompt oluştur (görüntü durumuna göre)
         prompt = self._build_prompt(query_text, conv_mode, has_image)
         print(f"[debug] Generated prompt: {repr(prompt[:200])}")
-        # 3) Tokenize
-        if has_image:
-            # Görüntü varsa IMAGE_TOKEN_INDEX ile tokenize et
-            input_ids = tokenizer_image_token(
-                prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
-            )
-        else:
-            # Görüntü yoksa normal tokenize
-            input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids[0]
-        # Batch dimension ekle
-        input_ids = input_ids.unsqueeze(0).to(self.model.device, non_blocking=True)
         # Input uzunluk kontrolü
         if input_ids.shape[-1] > self.context_len - 100:
@@ -216,12 +252,12 @@ class EndpointHandler:
             "pad_token_id": self.tokenizer.eos_token_id,
         }
-        # Görüntü varsa images parametresini ekle
-        if has_image and image_tensors is not None:
             gen_kwargs["images"] = image_tensors
             print(f"[info] Using images in generation, shape: {image_tensors.shape}")
         else:
-            print("[info] No images in generation")
         try:
             with torch.inference_mode():

 # -*- coding: utf-8 -*-
 import os, io, sys, subprocess, base64
 from typing import Any, Dict, List, Optional
         # Sadece görüntü varsa image token'ları ekle
         if has_image:
             if self.use_im_start_end:
+                # <image> tag'ini kullan - tokenizer_image_token bunu arar
+                content = f"{DEFAULT_IM_START_TOKEN}<image>{DEFAULT_IM_END_TOKEN}\n{user_text}"
             else:
+                # <image> tag'ini kullan - tokenizer_image_token bunu arar
+                content = f"<image>\n{user_text}"
         else:
             # Görüntü yoksa sadece text
             content = user_text
                         else:
                             image_tensors = processed_images
+                        if image_tensors is not None and image_tensors.numel() > 0:
                             image_tensors = image_tensors.to(
                                 self.model.device,
                                 dtype=torch.float16,
                             has_image = True
                             print(f"[info] Image processed successfully, shape: {image_tensors.shape}")
                         else:
+                            print("[warn] Image processing returned empty tensor")
+                            image_tensors = None
+                            has_image = False
                 except Exception as e:
                     print(f"[warn] image processing failed: {e}")
+                    import traceback
+                    traceback.print_exc()
                     image_tensors = None
                     has_image = False
         # 2) Prompt oluştur (görüntü durumuna göre)
         prompt = self._build_prompt(query_text, conv_mode, has_image)
         print(f"[debug] Generated prompt: {repr(prompt[:200])}")
+        print(f"[debug] Has image: {has_image}")
+        # 3) Tokenize - CRITICAL: <image> tag kontrolü
+        try:
+            if has_image and image_tensors is not None:
+                # Görüntü varsa IMAGE_TOKEN_INDEX ile tokenize et
+                # tokenizer_image_token fonksiyonu <image> tag'ini arar
+                input_ids = tokenizer_image_token(
+                    prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+                )
+                if input_ids.dim() == 1:
+                    input_ids = input_ids.unsqueeze(0)
+                # <image> tag'inin prompt'ta olduğunu kontrol et
+                if '<image>' not in prompt:
+                    print("[warn] <image> tag not found in prompt, switching to text-only mode")
+                    has_image = False
+                    image_tensors = None
+                    input_ids = self.tokenizer(user_text, return_tensors="pt").input_ids
+                elif IMAGE_TOKEN_INDEX not in input_ids:
+                    print(f"[warn] IMAGE_TOKEN_INDEX ({IMAGE_TOKEN_INDEX}) not found in input_ids after tokenization")
+                    print(f"[debug] input_ids unique values: {torch.unique(input_ids)[:10]}...")  # İlk 10 unique value
+                    # Fallback: Normal tokenization
+                    has_image = False
+                    image_tensors = None
+                    input_ids = self.tokenizer(user_text, return_tensors="pt").input_ids
+                else:
+                    print(f"[info] Successfully tokenized with IMAGE_TOKEN_INDEX: {IMAGE_TOKEN_INDEX}")
+            else:
+                # Görüntü yoksa normal tokenize - sadece user_text kullan
+                input_ids = self.tokenizer(user_text, return_tensors="pt").input_ids
+            input_ids = input_ids.to(self.model.device, non_blocking=True)
+            print(f"[debug] input_ids shape: {input_ids.shape}")
+        except Exception as e:
+            print(f"[error] Tokenization failed: {e}")
+            # Fallback to text-only mode
+            has_image = False
+            image_tensors = None
+            input_ids = self.tokenizer(user_text, return_tensors="pt").input_ids
+            input_ids = input_ids.to(self.model.device, non_blocking=True)
         # Input uzunluk kontrolü
         if input_ids.shape[-1] > self.context_len - 100:
             "pad_token_id": self.tokenizer.eos_token_id,
         }
+        # CRITICAL: Sadece gerçekten geçerli görüntü tensors varsa ekle
+        if has_image and image_tensors is not None and IMAGE_TOKEN_INDEX in input_ids:
             gen_kwargs["images"] = image_tensors
             print(f"[info] Using images in generation, shape: {image_tensors.shape}")
         else:
+            print("[info] No images in generation - text-only mode")
         try:
             with torch.inference_mode():