Files changed (1) hide show
  1. handler.py +318 -524
handler.py CHANGED
@@ -1,542 +1,336 @@
1
- """
2
- PULSE-7B Enhanced Handler
3
- Ubden® Team - Edited by https://github.com/ck-cankurt
4
- Support: Text, Image URLs, and Base64 encoded images
5
- """
 
 
 
6
 
7
- import torch
8
- from typing import Dict, List, Any
 
9
  import base64
10
- from io import BytesIO
 
 
 
11
  from PIL import Image
12
  import requests
13
- import time
14
-
15
- # Import utilities if available
16
- try:
17
- from utils import (
18
- performance_monitor,
19
- validate_image_input,
20
- sanitize_parameters,
21
- get_system_info,
22
- create_health_check,
23
- deepseek_client
24
- )
25
- UTILS_AVAILABLE = True
26
- except ImportError:
27
- UTILS_AVAILABLE = False
28
- deepseek_client = None
29
- print("⚠️ Utils module not found - performance monitoring and DeepSeek integration disabled")
30
-
31
- # Try to import LLaVA modules for proper conversation handling
32
- try:
33
- from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
34
- from llava.conversation import conv_templates, SeparatorStyle
35
- from llava.mm_utils import tokenizer_image_token, process_images, KeywordsStoppingCriteria
36
- LLAVA_AVAILABLE = True
37
- print("✅ LLaVA modules imported successfully")
38
- except ImportError:
39
- LLAVA_AVAILABLE = False
40
- print("⚠️ LLaVA modules not available - using basic text processing")
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  class EndpointHandler:
44
- def __init__(self, path=""):
45
- """
46
- Hey there! Let's get this PULSE-7B model up and running.
47
- We'll load it from the HuggingFace hub directly, so no worries about local files.
48
-
49
- Args:
50
- path: Model directory path (we actually ignore this and load from HF hub)
51
- """
52
- print("🚀 Starting up PULSE-7B handler...")
53
- print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
54
- import sys
55
- print(f"🔧 Python version: {sys.version}")
56
- print(f"🔧 PyTorch version: {torch.__version__}")
57
-
58
- # Check transformers version
59
  try:
60
  import transformers
61
- print(f"🔧 Transformers version: {transformers.__version__}")
62
-
63
- # PULSE LLaVA works with transformers==4.37.2
64
- if transformers.__version__ == "4.37.2":
65
- print("✅ Using PULSE LLaVA compatible version (4.37.2)")
66
- elif "dev" in transformers.__version__ or "git" in str(transformers.__version__):
67
- print("⚠️ Using development version - may conflict with PULSE LLaVA")
68
- else:
69
- print("⚠️ Using different version - PULSE LLaVA prefers 4.37.2")
70
  except Exception as e:
71
- print(f" Error checking transformers version: {e}")
72
-
73
- print(f"🔧 CUDA available: {torch.cuda.is_available()}")
74
- if torch.cuda.is_available():
75
- print(f"🔧 CUDA device: {torch.cuda.get_device_name(0)}")
76
-
77
- # Let's see what hardware we're working with
78
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
79
- print(f"🖥️ Running on: {self.device}")
80
-
 
 
 
 
 
 
 
 
 
81
  try:
82
- # First attempt - PULSE demo's exact approach
83
- if LLAVA_AVAILABLE:
84
- print("📦 Using PULSE demo's load_pretrained_model approach...")
85
- from llava.model.builder import load_pretrained_model
86
- from llava.mm_utils import get_model_name_from_path
87
-
88
- model_path = "PULSE-ECG/PULSE-7B"
89
- model_name = get_model_name_from_path(model_path)
90
-
91
- self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
92
- model_path=model_path,
93
- model_base=None,
94
- model_name=model_name,
95
- load_8bit=False,
96
- load_4bit=False
97
- )
98
-
99
- # Move model to device like demo
100
- self.model = self.model.to(self.device)
101
- self.use_pipeline = False
102
- print("✅ Model loaded successfully with PULSE demo's approach!")
103
- print(f"📸 Image processor: {type(self.image_processor).__name__}")
104
-
105
- else:
106
- raise ImportError("LLaVA modules not available")
107
-
108
  except Exception as e:
109
- print(f"⚠️ PULSE demo approach failed: {e}")
110
- print("🔄 Falling back to pipeline...")
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  try:
113
- # Fallback - using pipeline
114
- from transformers import pipeline
115
-
116
- print("📦 Fetching model from HuggingFace Hub...")
117
- self.pipe = pipeline(
118
- "text-generation",
119
- model="PULSE-ECG/PULSE-7B",
120
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
121
- device=0 if torch.cuda.is_available() else -1,
122
- trust_remote_code=True,
123
- model_kwargs={
124
- "low_cpu_mem_usage": True,
125
- "use_safetensors": True
126
- }
127
- )
128
- self.use_pipeline = True
129
- self.image_processor = None
130
- print("✅ Model loaded successfully via pipeline!")
131
-
132
  except Exception as e2:
133
- print(f"😓 Pipeline also failed: {e2}")
134
-
135
- try:
136
- # Last resort - manual loading
137
- from transformers import AutoTokenizer, LlamaForCausalLM
138
-
139
- print("📖 Manual loading as last resort...")
140
- self.tokenizer = AutoTokenizer.from_pretrained(
141
- "PULSE-ECG/PULSE-7B",
142
- trust_remote_code=True
143
- )
144
-
145
- self.model = LlamaForCausalLM.from_pretrained(
146
- "PULSE-ECG/PULSE-7B",
147
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
148
- device_map="auto",
149
- low_cpu_mem_usage=True,
150
- trust_remote_code=True
151
- )
152
-
153
- if self.tokenizer.pad_token is None:
154
- self.tokenizer.pad_token = self.tokenizer.eos_token
155
- self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
156
-
157
- self.model.eval()
158
- self.use_pipeline = False
159
- self.image_processor = None
160
- print("✅ Model loaded manually!")
161
-
162
- except Exception as e3:
163
- print(f"😓 All approaches failed: {e3}")
164
- self.pipe = None
165
- self.model = None
166
- self.tokenizer = None
167
- self.image_processor = None
168
- self.use_pipeline = None
169
-
170
- # Final status report
171
- print("\n🔍 Model Loading Status Report:")
172
- print(f" - use_pipeline: {self.use_pipeline}")
173
- print(f" - model: {'✅ Loaded' if hasattr(self, 'model') and self.model is not None else '❌ None'}")
174
- print(f" - tokenizer: {'✅ Loaded' if hasattr(self, 'tokenizer') and self.tokenizer is not None else '❌ None'}")
175
- print(f" - image_processor: {'✅ Loaded' if hasattr(self, 'image_processor') and self.image_processor is not None else '❌ None'}")
176
- print(f" - pipe: {'✅ Loaded' if hasattr(self, 'pipe') and self.pipe is not None else '❌ None'}")
177
-
178
- # Check if any model component loaded successfully
179
- has_model = hasattr(self, 'model') and self.model is not None
180
- has_tokenizer = hasattr(self, 'tokenizer') and self.tokenizer is not None
181
- has_pipe = hasattr(self, 'pipe') and self.pipe is not None
182
- has_image_processor = hasattr(self, 'image_processor') and self.image_processor is not None
183
-
184
- if not (has_model or has_tokenizer or has_pipe):
185
- print("💥 CRITICAL: No model components loaded successfully!")
186
- else:
187
- print("✅ At least one model component loaded successfully")
188
- if has_image_processor:
189
- print("🖼️ Vision capabilities available!")
190
- else:
191
- print("⚠️ No image processor - text-only mode")
192
-
193
- def is_valid_image_format(self, filename_or_url):
194
- """Validate image format like PULSE demo"""
195
- # Demo's supported formats
196
- image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"]
197
-
198
- if filename_or_url.startswith(('http://', 'https://')):
199
- # For URLs, check the extension or content-type
200
- ext = filename_or_url.split('.')[-1].split('?')[0].lower()
201
- return ext in image_extensions
202
- else:
203
- # For base64 or local files
204
- return True # Base64 will be validated during decode
205
-
206
- def process_image_input(self, image_input):
207
- """
208
- Handle both URL and base64 image inputs exactly like PULSE demo
209
-
210
- Args:
211
- image_input: Can be a URL string or base64 encoded image
212
-
213
- Returns:
214
- PIL Image object or None if something goes wrong
215
- """
216
  try:
217
- # Check if it's a URL (starts with http/https)
218
- if isinstance(image_input, str) and (image_input.startswith('http://') or image_input.startswith('https://')):
219
- print(f"🌐 Fetching image from URL: {image_input[:50]}...")
220
-
221
- # Validate format like demo
222
- if not self.is_valid_image_format(image_input):
223
- print("❌ Invalid image format in URL")
224
- return None
225
-
226
- # Demo's exact image loading approach
227
- response = requests.get(image_input, timeout=15)
228
- if response.status_code == 200:
229
- image = Image.open(BytesIO(response.content)).convert("RGB")
230
- print(f" Image downloaded successfully! Size: {image.size}")
231
- return image
232
- else:
233
- print(f"❌ Failed to load image: status {response.status_code}")
234
- return None
235
-
236
- # Must be base64 then
237
- elif isinstance(image_input, str):
238
- print("🔍 Decoding base64 image...")
239
-
240
- # Remove the data URL prefix if it exists
241
- base64_data = image_input
242
- if "base64," in image_input:
243
- base64_data = image_input.split("base64,")[1]
244
-
245
- # Clean and validate base64 data
246
- base64_data = base64_data.strip().replace('\n', '').replace('\r', '').replace(' ', '')
247
-
248
- try:
249
- image_data = base64.b64decode(base64_data)
250
- image = Image.open(BytesIO(image_data)).convert('RGB')
251
- print(f"✅ Base64 image decoded successfully! Size: {image.size}")
252
- return image
253
- except Exception as decode_error:
254
- print(f"❌ Base64 decode error: {decode_error}")
255
- return None
256
-
257
  except Exception as e:
258
- print(f" Couldn't process the image: {e}")
259
- return None
260
-
261
- return None
262
-
263
- def add_turkish_commentary(self, response: Dict[str, Any], enable_commentary: bool, timeout: int = 30) -> Dict[str, Any]:
264
- """Add Turkish commentary to the response using DeepSeek API"""
265
- if not enable_commentary:
266
- return response
267
-
268
- if not UTILS_AVAILABLE or not deepseek_client:
269
- print("⚠️ DeepSeek client not available - skipping Turkish commentary")
270
- response["commentary_status"] = "unavailable"
271
- return response
272
-
273
- if not deepseek_client.is_available():
274
- print("⚠️ DeepSeek API key not configured - skipping Turkish commentary")
275
- response["commentary_status"] = "api_key_missing"
276
- return response
277
-
278
- generated_text = response.get("generated_text", "")
279
- if not generated_text:
280
- print("⚠️ No generated text to comment on")
281
- response["commentary_status"] = "no_text"
282
- return response
283
-
284
- print("🔄 DeepSeek ile Türkçe yorum ekleniyor...")
285
- commentary_result = deepseek_client.get_turkish_commentary(generated_text, timeout)
286
-
287
- if commentary_result["success"]:
288
- response["comment_text"] = commentary_result["comment_text"]
289
- response["commentary_model"] = commentary_result.get("model", "deepseek-chat")
290
- response["commentary_tokens"] = commentary_result.get("tokens_used", 0)
291
- response["commentary_status"] = "success"
292
- print("✅ Türkçe yorum başarıyla eklendi")
293
- else:
294
- response["comment_text"] = ""
295
- response["commentary_error"] = commentary_result["error"]
296
- response["commentary_status"] = "failed"
297
- print(f"❌ Türkçe yorum eklenemedi: {commentary_result['error']}")
298
-
299
- return response
300
-
301
- def health_check(self) -> Dict[str, Any]:
302
- """Health check endpoint"""
303
- if UTILS_AVAILABLE:
304
- return create_health_check()
305
- else:
306
- return {
307
- 'status': 'healthy',
308
- 'model': 'PULSE-7B',
309
- 'timestamp': time.time(),
310
- 'handler_version': '2.0.0'
311
- }
312
-
313
- def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
314
- """
315
- Main processing function - where the magic happens!
316
-
317
- Args:
318
- data: Input data with 'inputs' and optional 'parameters'
319
-
320
- Returns:
321
- List with the generated response
322
- """
323
- # Quick check - is our model ready?
324
- if self.use_pipeline is None:
325
- return [{
326
- "generated_text": "Oops! Model couldn't load properly. Please check the deployment settings.",
327
- "error": "Model initialization failed",
328
- "handler": "Ubden® Team Enhanced Handler"
329
- }]
330
-
331
  try:
332
- # Parse the inputs - flexible format support
333
- inputs = data.get("inputs", "")
334
- text = ""
335
- image = None
336
-
337
- if isinstance(inputs, dict):
338
- # Dictionary input - check for text and image
339
- # Support query field (new) plus original text/prompt fields
340
- text = inputs.get("query", inputs.get("text", inputs.get("prompt", str(inputs))))
341
-
342
- # Check for image in various formats
343
- image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
344
- if image_input:
345
- image = self.process_image_input(image_input)
346
- if image:
347
- # Since we're in text-only mode, create smart ECG context
348
- print(f"🖼️ Image loaded: {image.size[0]}x{image.size[1]} pixels - using text-only ECG analysis mode")
349
-
350
- # Create ECG-specific prompt that mimics visual analysis
351
- ecg_context = f"Analyzing an ECG image ({image.size[0]}x{image.size[1]} pixels). "
352
-
353
- # Use demo's exact approach - no additional context, just the query
354
- # Model is trained to understand ECG images from text queries
355
- pass # Keep text exactly as received
356
- else:
357
- # Simple string input
358
- text = str(inputs)
359
-
360
- if not text:
361
- return [{"generated_text": "Hey, I need some text to work with! Please provide an input."}]
362
-
363
- # Get generation parameters - using PULSE-7B demo's exact settings
364
- parameters = data.get("parameters", {})
365
- max_new_tokens = min(parameters.get("max_new_tokens", 1024), 8192) # Demo uses 1024 default
366
- temperature = parameters.get("temperature", 0.05) # Demo uses 0.05 for precise medical analysis
367
- top_p = parameters.get("top_p", 1.0) # Demo uses 1.0 for full vocabulary access
368
- do_sample = parameters.get("do_sample", True) # Demo uses sampling
369
- repetition_penalty = parameters.get("repetition_penalty", 1.0) # Demo default
370
-
371
- print(f"🎛️ Generation params: max_tokens={max_new_tokens}, temp={temperature}, top_p={top_p}, do_sample={do_sample}, rep_penalty={repetition_penalty}")
372
-
373
- # Check if Turkish commentary is requested (NEW FEATURE)
374
- enable_turkish_commentary = parameters.get("enable_turkish_commentary", False) # Default false
375
-
376
- # Using pipeline? Let's go!
377
- if self.use_pipeline:
378
- print(f"🎛️ Pipeline generation: temp={temperature}, tokens={max_new_tokens}")
379
- print(f"📝 Input text: '{text[:100]}...'")
380
-
381
- result = self.pipe(
382
- text,
383
- max_new_tokens=max_new_tokens,
384
- min_new_tokens=200, # Force very detailed analysis to match demo
385
- temperature=temperature,
386
- top_p=top_p,
387
- do_sample=do_sample,
388
- repetition_penalty=repetition_penalty,
389
- return_full_text=False # Just the new stuff, not the input
390
- )
391
-
392
- # Pipeline returns a list, let's handle it
393
- if isinstance(result, list) and len(result) > 0:
394
- generated_text = result[0].get("generated_text", "").strip()
395
-
396
- print(f"🔍 Pipeline debug:")
397
- print(f" - Raw result: '{str(result[0])[:200]}...'")
398
- print(f" - Generated text length: {len(generated_text)}")
399
-
400
- # Clean up common issues
401
- if generated_text.startswith(text):
402
- generated_text = generated_text[len(text):].strip()
403
- print("🔧 Removed input text from output")
404
-
405
- # Remove common artifacts
406
- generated_text = generated_text.replace("</s>", "").strip()
407
-
408
- if not generated_text:
409
- print("❌ Pipeline generated empty text!")
410
- generated_text = "Empty response from pipeline. Please try different parameters."
411
-
412
- print(f"✅ Final pipeline text: '{generated_text[:100]}...' (length: {len(generated_text)})")
413
-
414
- # Create response
415
- response = {"generated_text": generated_text}
416
-
417
- # Add Turkish commentary if requested (NEW FEATURE)
418
- if enable_turkish_commentary:
419
- response = self.add_turkish_commentary(response, True)
420
-
421
- return [response]
422
- else:
423
- generated_text = str(result).strip()
424
-
425
- # Create response
426
- response = {"generated_text": generated_text}
427
-
428
- # Add Turkish commentary if requested (NEW FEATURE)
429
- if enable_turkish_commentary:
430
- response = self.add_turkish_commentary(response, True)
431
-
432
- return [response]
433
-
434
- # Manual generation mode - using PULSE demo's exact approach
435
- else:
436
- print(f"🔥 Manual generation with PULSE demo logic: temp={temperature}, tokens={max_new_tokens}")
437
- print(f"📝 Input text: '{text[:100]}...'")
438
-
439
-
440
-
441
- # Text-only generation with enhanced ECG context
442
- print("🔤 Using enhanced text-only generation with ECG context")
443
-
444
- # Tokenize the enhanced prompt
445
- encoded = self.tokenizer(
446
- text,
447
- return_tensors="pt",
448
- truncation=True,
449
- max_length=4096 # Increased for longer prompts
450
- )
451
-
452
- input_ids = encoded["input_ids"].to(self.device)
453
- attention_mask = encoded.get("attention_mask")
454
- if attention_mask is not None:
455
- attention_mask = attention_mask.to(self.device)
456
-
457
- print(f"🔍 Enhanced generation debug:")
458
- print(f" - Enhanced prompt length: {len(text)} chars")
459
- print(f" - Input tokens: {input_ids.shape[-1]}")
460
- print(f" - Prompt preview: '{text[:150]}...'")
461
-
462
- # Generate with enhanced settings for medical analysis
463
- with torch.no_grad():
464
- outputs = self.model.generate(
465
- input_ids,
466
- attention_mask=attention_mask,
467
- max_new_tokens=max_new_tokens,
468
- min_new_tokens=200, # Force detailed response like demo
469
- temperature=temperature,
470
- top_p=top_p,
471
- do_sample=do_sample,
472
- repetition_penalty=repetition_penalty,
473
- pad_token_id=self.tokenizer.pad_token_id,
474
- eos_token_id=self.tokenizer.eos_token_id,
475
- early_stopping=False
476
- )
477
-
478
- # Decode and clean response
479
- generated_ids = outputs[0][input_ids.shape[-1]:]
480
- generated_text = self.tokenizer.decode(
481
- generated_ids,
482
- skip_special_tokens=True,
483
- clean_up_tokenization_spaces=True
484
- ).strip()
485
-
486
- # Aggressive cleanup of artifacts
487
- generated_text = generated_text.replace("</s>", "").strip()
488
-
489
- # Simple cleanup - just remove Answer prefix and parentheses
490
- if generated_text.startswith("(Answer:") and ")" in generated_text:
491
- # Just remove the parentheses and Answer: prefix
492
- end_paren = generated_text.find(")")
493
- answer_content = generated_text[8:end_paren].strip() # Remove "(Answer:"
494
- # Keep the rest of the response if there is any
495
- rest_of_response = generated_text[end_paren+1:].strip()
496
-
497
- if rest_of_response:
498
- generated_text = f"{answer_content}. {rest_of_response}"
499
- else:
500
- generated_text = answer_content
501
-
502
- elif generated_text.startswith("Answer:"):
503
- generated_text = generated_text[7:].strip()
504
-
505
- # Remove only clear training artifacts
506
- cleanup_patterns = [
507
- "In this task",
508
- "I'm asking the respondent",
509
- "The respondent should"
510
- ]
511
-
512
- for pattern in cleanup_patterns:
513
- if pattern in generated_text:
514
- parts = generated_text.split(pattern)
515
- generated_text = parts[0].strip()
516
- break
517
-
518
- # Only provide fallback if response is truly empty or malformed
519
- if len(generated_text) < 10 or generated_text.startswith("7)"):
520
- print("⚠️ Malformed response detected, providing fallback...")
521
- generated_text = "This ECG shows cardiac electrical activity. For accurate interpretation, please consult with a qualified cardiologist who can analyze the specific waveforms, intervals, and morphology patterns."
522
-
523
- print(f"✅ Enhanced text-only generation: '{generated_text[:100]}...' (length: {len(generated_text)})")
524
-
525
- # Create response
526
- response = {"generated_text": generated_text}
527
-
528
- # Add Turkish commentary if requested (NEW FEATURE)
529
- if enable_turkish_commentary:
530
- response = self.add_turkish_commentary(response, True)
531
-
532
- return [response]
533
-
534
-
535
  except Exception as e:
536
- error_msg = f"Something went wrong during generation: {str(e)}"
537
- print(f"❌ {error_msg}")
538
- return [{
539
- "generated_text": "",
540
- "error": error_msg,
541
- "handler": "Ubden® Team Enhanced Handler"
542
- }]
 
1
+ # -*- coding: utf-8 -*-
2
+ # handler.py — Rapid_ECG / PULSE-7B Startup-load, Stabil ve DEBUG'li sürüm
3
+ # - Sunucu açılır açılmaz model yüklenir (cold start only once)
4
+ # - HF Endpoint sözleşmesi (EndpointHandler.load().__call__)
5
+ # - Yerel (HF_MODEL_DIR) → Hub (HF_MODEL_ID) yükleme sırası
6
+ # - Görsel sadece .preprocess() ile işlenir (process_images yok)
7
+ # - Vision tower kontrolü: mm_vision_tower veya vision_tower
8
+ # - IMAGE_TOKEN_INDEX kullanımı ve kapsamlı [DEBUG] logları
9
 
10
+ import os
11
+ import io
12
+ import sys
13
  import base64
14
+ import subprocess
15
+ from typing import Any, Dict, Optional
16
+
17
+ import torch
18
  from PIL import Image
19
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
+ # ===== LLaVA kütüphanesini garantiye al =====
23
+ def _ensure_llava(tag: str = "v1.2.0"):
24
+ try:
25
+ import llava # noqa
26
+ print("[DEBUG] LLaVA already available.")
27
+ return
28
+ except ImportError:
29
+ print(f"[DEBUG] LLaVA not found; installing (tag={tag}) ...")
30
+ subprocess.check_call([
31
+ sys.executable, "-m", "pip", "install",
32
+ f"git+https://github.com/haotian-liu/LLaVA@{tag}#egg=llava"
33
+ ])
34
+ print("[DEBUG] LLaVA installed.")
35
+
36
+ _ensure_llava("v1.2.0")
37
+
38
+ # ===== LLaVA importları =====
39
+ from llava.conversation import conv_templates
40
+ from llava.constants import (
41
+ DEFAULT_IMAGE_TOKEN,
42
+ DEFAULT_IM_START_TOKEN,
43
+ DEFAULT_IM_END_TOKEN,
44
+ IMAGE_TOKEN_INDEX,
45
+ )
46
+ from llava.model.builder import load_pretrained_model
47
+ from llava.mm_utils import tokenizer_image_token, get_model_name_from_path
48
+
49
+
50
+ # ---------- yardımcılar ----------
51
+ def _get_env(name: str, default: Optional[str] = None) -> Optional[str]:
52
+ v = os.getenv(name)
53
+ return v if v not in (None, "") else default
54
+
55
+ def _pick_device() -> torch.device:
56
+ if torch.cuda.is_available():
57
+ dev = torch.device("cuda")
58
+ elif torch.backends.mps.is_available():
59
+ dev = torch.device("mps")
60
+ else:
61
+ dev = torch.device("cpu")
62
+ print(f"[DEBUG] pick_device -> {dev}")
63
+ return dev
64
+
65
+ def _pick_dtype(device: torch.device):
66
+ if device.type == "cuda":
67
+ dt = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
68
+ else:
69
+ dt = torch.float32
70
+ print(f"[DEBUG] pick_dtype({device}) -> {dt}")
71
+ return dt
72
+
73
+ def _is_probably_base64(s: str) -> bool:
74
+ s = s.strip()
75
+ if s.startswith("data:image"):
76
+ return True
77
+ allowed = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r")
78
+ return len(s) % 4 == 0 and all(c in allowed for c in s)
79
+
80
+ def _load_image_from_any(image_input: Any) -> Image.Image:
81
+ print(f"[DEBUG] _load_image_from_any type={type(image_input)}")
82
+ if isinstance(image_input, Image.Image):
83
+ return image_input.convert("RGB")
84
+ if isinstance(image_input, (bytes, bytearray)):
85
+ return Image.open(io.BytesIO(image_input)).convert("RGB")
86
+ if hasattr(image_input, "read"):
87
+ return Image.open(image_input).convert("RGB")
88
+ if isinstance(image_input, str):
89
+ s = image_input.strip()
90
+ if s.startswith("data:image"):
91
+ try:
92
+ _, b64 = s.split(",", 1)
93
+ data = base64.b64decode(b64)
94
+ return Image.open(io.BytesIO(data)).convert("RGB")
95
+ except Exception as e:
96
+ raise ValueError(f"Bad data URL: {e}")
97
+ if _is_probably_base64(s) and not s.startswith(("http://", "https://")):
98
+ try:
99
+ data = base64.b64decode(s)
100
+ return Image.open(io.BytesIO(data)).convert("RGB")
101
+ except Exception as e:
102
+ raise ValueError(f"Bad base64 image: {e}")
103
+ if s.startswith(("http://", "https://")):
104
+ resp = requests.get(s, timeout=20)
105
+ resp.raise_for_status()
106
+ return Image.open(io.BytesIO(resp.content)).convert("RGB")
107
+ # local path
108
+ return Image.open(s).convert("RGB")
109
+ raise ValueError(f"Unsupported image input type: {type(image_input)}")
110
+
111
+ def _get_conv_mode(model_name: str) -> str:
112
+ name = (model_name or "").lower()
113
+ if "llama-2" in name:
114
+ return "llava_llama_2"
115
+ if "mistral" in name:
116
+ return "mistral_instruct"
117
+ if "v1.6-34b" in name:
118
+ return "chatml_direct"
119
+ if "v1" in name or "pulse" in name:
120
+ return "llava_v1"
121
+ if "mpt" in name:
122
+ return "mpt"
123
+ return "llava_v0"
124
+
125
+ def _build_prompt_with_image(prompt: str, model_cfg) -> str:
126
+ # Kullanıcı image token eklediyse yeniden eklemeyelim
127
+ if DEFAULT_IMAGE_TOKEN in prompt or DEFAULT_IM_START_TOKEN in prompt:
128
+ return prompt
129
+ if getattr(model_cfg, "mm_use_im_start_end", False):
130
+ token = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
131
+ return f"{token}\n{prompt}"
132
+ return f"{DEFAULT_IMAGE_TOKEN}\n{prompt}"
133
+
134
+ def _resolve_model_path(model_dir_hint: Optional[str], default_dir: str = "/repository") -> str:
135
+ # Öncelik: HF_MODEL_DIR (yerel) -> ctor'dan gelen model_dir_hint -> default_dir
136
+ p = _get_env("HF_MODEL_DIR") or model_dir_hint or default_dir
137
+ p = os.path.abspath(p)
138
+ print(f"[DEBUG] resolved model path: {p}")
139
+ return p
140
+
141
+
142
+ # ---------- Endpoint Handler ----------
143
  class EndpointHandler:
144
+ def __init__(self, model_dir: Optional[str] = None):
145
+ # DEBUG banner
146
+ print("🚀 Starting up PULSE-7B handler (startup load)...")
147
+ print("📝 Enhanced by Ubden® Team")
148
+ print(f"🔧 Python: {sys.version}")
149
+ print(f"🔧 PyTorch: {torch.__version__}")
 
 
 
 
 
 
 
 
 
150
  try:
151
  import transformers
152
+ print(f"🔧 Transformers: {transformers.__version__}")
 
 
 
 
 
 
 
 
153
  except Exception as e:
154
+ print(f"[DEBUG] transformers import failed: {e}")
155
+
156
+ self.model_dir = model_dir
157
+ self.device = _pick_device()
158
+ self.dtype = _pick_dtype(self.device)
159
+
160
+ # Ortam ayarları (flash attn ipucu, zarar vermez)
161
+ os.environ.setdefault("ATTN_IMPLEMENTATION", "flash_attention_2")
162
+ os.environ.setdefault("FLASH_ATTENTION", "1")
163
+ print(f"[DEBUG] ATTN_IMPLEMENTATION={os.getenv('ATTN_IMPLEMENTATION')} FLASH_ATTENTION={os.getenv('FLASH_ATTENTION')}")
164
+
165
+ # Model/Tokenizer/ImageProcessor konteynerleri
166
+ self.model = None
167
+ self.tokenizer = None
168
+ self.image_processor = None
169
+ self.context_len = None
170
+ self.model_name = None
171
+
172
+ # ---- Modeli burada (startup’ta) yükle ----
173
  try:
174
+ self._startup_load_model()
175
+ print("✅ Model loaded & ready in __init__")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  except Exception as e:
177
+ print(f"💥 CRITICAL: model startup load failed: {e}")
178
+ raise
179
+
180
+ def _startup_load_model(self):
181
+ # Yerel dizin varsa onu kullan, yoksa hub
182
+ local_path = _resolve_model_path(self.model_dir)
183
+ use_local = os.path.isdir(local_path) and any(
184
+ os.path.exists(os.path.join(local_path, f))
185
+ for f in ("config.json", "tokenizer_config.json")
186
+ )
187
+ model_base = _get_env("HF_MODEL_BASE", None)
188
+
189
+ if use_local:
190
+ model_path = local_path
191
+ print(f"[DEBUG] loading model LOCALLY from: {model_path}")
192
+ else:
193
+ model_path = _get_env("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
194
+ print(f"[DEBUG] loading model from HUB: {model_path} (HF_MODEL_BASE={model_base})")
195
+
196
+ # ⬇️ FIX: LLaVA v1.2.0 imzası model_name parametresi istiyor
197
+ model_name = get_model_name_from_path(model_path)
198
+ print(f"[DEBUG] resolved model_name: {model_name}")
199
+
200
+ print("[DEBUG] calling load_pretrained_model ...")
201
+ self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
202
+ model_path=model_path,
203
+ model_base=model_base,
204
+ model_name=model_name, # <-- gerekli parametre
205
+ load_8bit=False,
206
+ load_4bit=False,
207
+ device_map="auto",
208
+ device=self.device,
209
+ )
210
+ self.model_name = getattr(self.model.config, "name_or_path", str(model_path))
211
+ print(f"[DEBUG] model loaded: name={self.model_name}")
212
+
213
+ # Vision tower kontrolü (yeni/eskı alan adları)
214
+ vt = (
215
+ getattr(self.model.config, "mm_vision_tower", None)
216
+ or getattr(self.model.config, "vision_tower", None)
217
+ )
218
+ print(f"[DEBUG] vision tower: {vt}")
219
+ if self.image_processor is None or vt is None:
220
+ raise RuntimeError(
221
+ "[ERROR] Vision tower not loaded (mm_vision_tower/vision_tower None). "
222
+ "Yerel yükleme için HF_MODEL_DIR doğru klasörü göstermeli; "
223
+ "Hub için HF_MODEL_ID PULSE/LLaVA tabanlı olmalı (örn: 'PULSE-ECG/PULSE-7B')."
224
+ )
225
+
226
+ # Tokenizer güvenliği
227
+ try:
228
+ self.tokenizer.padding_side = "left"
229
+ if getattr(self.tokenizer, "pad_token_id", None) is None:
230
+ self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
231
+ except Exception as e:
232
+ print(f"[DEBUG] tokenizer safety patch failed: {e}")
233
+
234
+ self.model.eval()
235
+
236
+ # HF inference toolkit load() yine çağıracağı için no-op
237
+ def load(self):
238
+ print("[DEBUG] load(): model is already initialized in __init__")
239
+ return True
240
+
241
+ @torch.inference_mode()
242
+ def __call__(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
243
+ print(f"[DEBUG] __call__ inputs keys={list(inputs.keys()) if hasattr(inputs,'keys') else 'N/A'}")
244
+ # HF {"inputs": {...}} sarmasını aç
245
+ if "inputs" in inputs and isinstance(inputs["inputs"], dict):
246
+ inputs = inputs["inputs"]
247
+
248
+ prompt = inputs.get("query") or inputs.get("prompt") or inputs.get("istem") or ""
249
+ image_in = inputs.get("image") or inputs.get("image_url") or inputs.get("img")
250
+ if not image_in:
251
+ return {"error": "Missing 'image' in payload"}
252
+ if not isinstance(prompt, str) or not prompt.strip():
253
+ return {"error": "Missing 'query'/'prompt' text"}
254
+
255
+ # Üretim parametreleri
256
+ temperature = float(inputs.get("temperature", 0.0))
257
+ top_p = float(inputs.get("top_p", 0.9))
258
+ max_new = int(inputs.get("max_new_tokens", inputs.get("max_tokens", 512)))
259
+ repetition_penalty = float(inputs.get("repetition_penalty", 1.0))
260
+ conv_mode_override = inputs.get("conv_mode") or _get_env("CONV_MODE", None)
261
+
262
+ # ---- Görsel yükle + preprocess
263
+ try:
264
+ image = _load_image_from_any(image_in)
265
+ print(f"[DEBUG] loaded image size={image.size}")
266
+ except Exception as e:
267
+ return {"error": f"Failed to load image: {e}"}
268
+
269
+ if self.image_processor is None:
270
+ return {"error": "image_processor is None; model not initialized properly (no vision tower)"}
271
+
272
+ try:
273
+ out = self.image_processor.preprocess(image, return_tensors="pt")
274
+ images_tensor = out["pixel_values"].to(self.device, dtype=self.dtype)
275
+ image_sizes = [image.size]
276
+ print(f"[DEBUG] preprocess OK; images_tensor.shape={images_tensor.shape}")
277
+ except Exception as e:
278
+ return {"error": f"Image preprocessing failed: {e}"}
279
+
280
+ # ---- Konuşma + prompt
281
+ mode = conv_mode_override or _get_conv_mode(self.model_name)
282
+ conv = (conv_templates.get(mode) or conv_templates[list(conv_templates.keys())[0]]).copy()
283
+ conv.append_message(conv.roles[0], _build_prompt_with_image(prompt.strip(), self.model.config))
284
+ conv.append_message(conv.roles[1], None)
285
+ full_prompt = conv.get_prompt()
286
+ print(f"[DEBUG] conv_mode={mode}; full_prompt_len={len(full_prompt)}")
287
+
288
+ # ---- Tokenization (IMAGE_TOKEN_INDEX ile)
289
+ try:
290
+ input_ids = tokenizer_image_token(
291
+ full_prompt, self.tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors="pt"
292
+ ).unsqueeze(0).to(self.device)
293
+ print(f"[DEBUG] tokenizer_image_token OK; input_ids.shape={input_ids.shape}")
294
+ except Exception as e:
295
+ print(f"[DEBUG] tokenizer_image_token failed: {e}; fallback to plain tokenizer")
296
  try:
297
+ toks = self.tokenizer([full_prompt], return_tensors="pt", padding=True, truncation=True)
298
+ input_ids = toks["input_ids"].to(self.device)
299
+ print(f"[DEBUG] plain tokenizer OK; input_ids.shape={input_ids.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  except Exception as e2:
301
+ return {"error": f"Tokenization failed: {e} / {e2}"}
302
+
303
+ attention_mask = torch.ones_like(input_ids, device=self.device)
304
+
305
+ # ---- Generate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  try:
307
+ print(f"[DEBUG] generate(max_new_tokens={max_new}, temp={temperature}, top_p={top_p}, rep={repetition_penalty})")
308
+ gen_ids = self.model.generate(
309
+ input_ids=input_ids,
310
+ attention_mask=attention_mask,
311
+ images=images_tensor,
312
+ image_sizes=image_sizes,
313
+ do_sample=(temperature > 0),
314
+ temperature=temperature,
315
+ top_p=top_p,
316
+ max_new_tokens=max_new,
317
+ repetition_penalty=repetition_penalty,
318
+ use_cache=True,
319
+ )
320
+ print(f"[DEBUG] generate OK; gen_ids.shape={gen_ids.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  except Exception as e:
322
+ return {"error": f"Generation failed: {e}"}
323
+
324
+ # ---- Decode (sadece yeni tokenlar)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  try:
326
+ new_tokens = gen_ids[0, input_ids.shape[1]:]
327
+ text = self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
328
+ print(f"[DEBUG] decoded_text_len={len(text)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  except Exception as e:
330
+ return {"error": f"Decode failed: {e}"}
331
+
332
+ return {
333
+ "generated_text": text,
334
+ "model": self.model_name,
335
+ "conv_mode": mode,
336
+ }