ubden
/

aimedlab-pulse-hf

@@ -435,6 +435,88 @@ class EndpointHandler:
             else:
                 print(f"🔥 Manual generation with PULSE demo logic: temp={temperature}, tokens={max_new_tokens}")
                 print(f"📝 Input text: '{text[:100]}...'")
                 # Text-only generation with enhanced ECG context
                 print("🔤 Using enhanced text-only generation with ECG context")

             else:
                 print(f"🔥 Manual generation with PULSE demo logic: temp={temperature}, tokens={max_new_tokens}")
                 print(f"📝 Input text: '{text[:100]}...'")
+                # ... inputs/text/image ayrıştırmasını yaptığın yerin hemen altına ekle ...
+                use_multimodal = (
+                    LLAVA_AVAILABLE
+                    and hasattr(self, "model") and self.model is not None
+                    and hasattr(self, "image_processor") and self.image_processor is not None
+                    and image is not None
+                )
+                if use_multimodal:
+                    try:
+                        # 1) LLaVA prompt (konuşma şablonu)
+                        from llava.constants import (
+                            IMAGE_TOKEN_INDEX,
+                            DEFAULT_IMAGE_TOKEN,
+                            DEFAULT_IM_START_TOKEN,
+                            DEFAULT_IM_END_TOKEN,
+                        )
+                        from llava.conversation import conv_templates
+                        conv = conv_templates.get("llava_v1") or conv_templates[list(conv_templates.keys())[0]]
+                        conv = conv.copy()
+                        conv.append_message(conv.roles[0], text)
+                        conv.append_message(conv.roles[1], None)
+                        prompt = conv.get_prompt()
+                        # 2) <image> sentinel'i başa ekle + gerekirse IM_START/END
+                        image_token = DEFAULT_IMAGE_TOKEN
+                        if getattr(getattr(self.model, "config", object()), "mm_use_im_start_end", False):
+                            image_token = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
+                        prompt = image_token + "\n" + prompt
+                        # 3) Görseli tensöre çevir
+                        imgs = process_images([image], self.image_processor, self.model.config)
+                        model_device = next(self.model.parameters()).device
+                        model_dtype  = next(self.model.parameters()).dtype
+                        if isinstance(imgs, list):
+                            images_tensor = [im.to(model_device, dtype=model_dtype) for im in imgs]
+                        else:
+                            images_tensor = imgs.to(model_device, dtype=model_dtype)
+                        image_sizes = [image.size]
+                        # 4) Promptu tokenize et (image sentinel'ı için özel tokenizer)
+                        input_ids = tokenizer_image_token(
+                            prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt'
+                        ).unsqueeze(0).to(model_device)
+                        # 5) Sağlam attention mask (bazı sürümler istiyor)
+                        attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=model_device)
+                        # 6) Jenerasyon — medikal için deterministik önerilir
+                        gen_kwargs = dict(
+                            max_new_tokens=min(parameters.get("max_new_tokens", 512), 1024),
+                            temperature=0.0,
+                            top_p=1.0,
+                            do_sample=False,
+                            repetition_penalty=parameters.get("repetition_penalty", 1.0),
+                            pad_token_id=self.tokenizer.pad_token_id,
+                            eos_token_id=getattr(self.tokenizer, "eos_token_id", None),
+                        )
+                        out = self.model.generate(
+                            inputs=input_ids,
+                            attention_mask=attention_mask,
+                            images=images_tensor,
+                            image_sizes=image_sizes,
+                            **gen_kwargs
+                        )
+                        # 7) Decode
+                        new_tokens = out.shape[-1] - input_ids.shape[-1]
+                        resp_ids = out[:, -new_tokens:] if new_tokens > 0 else out
+                        generated_text = self.tokenizer.decode(resp_ids[0], skip_special_tokens=True).strip()
+                        return [{"generated_text": generated_text, "mode": "multimodal"}]
+                    except Exception as e:
+                        print(f"[⚠️] Multimodal path failed → falling back to text-only: {e}")
+                        # Buradan sonra senin mevcut metin-only yolun çalışmaya devam etsin
+                        # (hiçbir şey return etme; aşağıdaki text-only blok zaten var)
                 # Text-only generation with enhanced ECG context
                 print("🔤 Using enhanced text-only generation with ECG context")