Files changed (1) hide show
  1. handler.py +82 -0
handler.py CHANGED
@@ -435,6 +435,88 @@ class EndpointHandler:
435
  else:
436
  print(f"🔥 Manual generation with PULSE demo logic: temp={temperature}, tokens={max_new_tokens}")
437
  print(f"📝 Input text: '{text[:100]}...'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  # Text-only generation with enhanced ECG context
440
  print("🔤 Using enhanced text-only generation with ECG context")
 
435
  else:
436
  print(f"🔥 Manual generation with PULSE demo logic: temp={temperature}, tokens={max_new_tokens}")
437
  print(f"📝 Input text: '{text[:100]}...'")
438
+
439
+ # ... inputs/text/image ayrıştırmasını yaptığın yerin hemen altına ekle ...
440
+
441
+ use_multimodal = (
442
+ LLAVA_AVAILABLE
443
+ and hasattr(self, "model") and self.model is not None
444
+ and hasattr(self, "image_processor") and self.image_processor is not None
445
+ and image is not None
446
+ )
447
+
448
+ if use_multimodal:
449
+ try:
450
+ # 1) LLaVA prompt (konuşma şablonu)
451
+ from llava.constants import (
452
+ IMAGE_TOKEN_INDEX,
453
+ DEFAULT_IMAGE_TOKEN,
454
+ DEFAULT_IM_START_TOKEN,
455
+ DEFAULT_IM_END_TOKEN,
456
+ )
457
+ from llava.conversation import conv_templates
458
+
459
+ conv = conv_templates.get("llava_v1") or conv_templates[list(conv_templates.keys())[0]]
460
+ conv = conv.copy()
461
+ conv.append_message(conv.roles[0], text)
462
+ conv.append_message(conv.roles[1], None)
463
+ prompt = conv.get_prompt()
464
+
465
+ # 2) <image> sentinel'i başa ekle + gerekirse IM_START/END
466
+ image_token = DEFAULT_IMAGE_TOKEN
467
+ if getattr(getattr(self.model, "config", object()), "mm_use_im_start_end", False):
468
+ image_token = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
469
+ prompt = image_token + "\n" + prompt
470
+
471
+ # 3) Görseli tensöre çevir
472
+ imgs = process_images([image], self.image_processor, self.model.config)
473
+ model_device = next(self.model.parameters()).device
474
+ model_dtype = next(self.model.parameters()).dtype
475
+ if isinstance(imgs, list):
476
+ images_tensor = [im.to(model_device, dtype=model_dtype) for im in imgs]
477
+ else:
478
+ images_tensor = imgs.to(model_device, dtype=model_dtype)
479
+ image_sizes = [image.size]
480
+
481
+ # 4) Promptu tokenize et (image sentinel'ı için özel tokenizer)
482
+ input_ids = tokenizer_image_token(
483
+ prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt'
484
+ ).unsqueeze(0).to(model_device)
485
+
486
+ # 5) Sağlam attention mask (bazı sürümler istiyor)
487
+ attention_mask = torch.ones_like(input_ids, dtype=torch.long, device=model_device)
488
+
489
+ # 6) Jenerasyon — medikal için deterministik önerilir
490
+ gen_kwargs = dict(
491
+ max_new_tokens=min(parameters.get("max_new_tokens", 512), 1024),
492
+ temperature=0.0,
493
+ top_p=1.0,
494
+ do_sample=False,
495
+ repetition_penalty=parameters.get("repetition_penalty", 1.0),
496
+ pad_token_id=self.tokenizer.pad_token_id,
497
+ eos_token_id=getattr(self.tokenizer, "eos_token_id", None),
498
+ )
499
+
500
+ out = self.model.generate(
501
+ inputs=input_ids,
502
+ attention_mask=attention_mask,
503
+ images=images_tensor,
504
+ image_sizes=image_sizes,
505
+ **gen_kwargs
506
+ )
507
+
508
+ # 7) Decode
509
+ new_tokens = out.shape[-1] - input_ids.shape[-1]
510
+ resp_ids = out[:, -new_tokens:] if new_tokens > 0 else out
511
+ generated_text = self.tokenizer.decode(resp_ids[0], skip_special_tokens=True).strip()
512
+
513
+ return [{"generated_text": generated_text, "mode": "multimodal"}]
514
+
515
+ except Exception as e:
516
+ print(f"[⚠️] Multimodal path failed → falling back to text-only: {e}")
517
+ # Buradan sonra senin mevcut metin-only yolun çalışmaya devam etsin
518
+ # (hiçbir şey return etme; aşağıdaki text-only blok zaten var)
519
+
520
 
521
  # Text-only generation with enhanced ECG context
522
  print("🔤 Using enhanced text-only generation with ECG context")