CanerDedeoglu commited on
Commit
56c38a0
·
verified ·
1 Parent(s): 05ae2ff

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +110 -70
handler.py CHANGED
@@ -1,10 +1,12 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- PULSE ECG Handler (demo-like streaming)
4
- - TextIteratorStreamer + skip_prompt=True (dilimleme yok; Step 1 korunur)
5
- - do_sample=True (demo davranışı), temperature/top_p payload'dan
6
- - Opsiyonel: no_stop, custom_stop, no_repeat_ngram_size, min_new_tokens
7
- - IM_START/END otomatik; 3D/4D/5D görüntü tensörü uyumlu; device/dtype eşleştirme
 
 
8
  """
9
 
10
  import os
@@ -14,13 +16,13 @@ import hashlib
14
  import datetime
15
  from io import BytesIO
16
  from threading import Thread
17
- from typing import Optional, List
18
 
19
  import torch
20
  from PIL import Image
21
  import requests
22
 
23
- # --- LLaVA / Transformers ---
24
  try:
25
  from llava.constants import (
26
  IMAGE_TOKEN_INDEX,
@@ -34,7 +36,6 @@ try:
34
  tokenizer_image_token,
35
  process_images,
36
  get_model_name_from_path,
37
- KeywordsStoppingCriteria,
38
  )
39
  from llava.utils import disable_torch_init
40
  LLAVA_AVAILABLE = True
@@ -49,7 +50,7 @@ except Exception as e:
49
  TRANSFORMERS_AVAILABLE = False
50
  print(f"[WARN] transformers not available: {e}")
51
 
52
- # --- HF Hub (opsiyonel logging) ---
53
  try:
54
  from huggingface_hub import HfApi, login
55
  HF_HUB_AVAILABLE = True
@@ -71,7 +72,7 @@ if HF_HUB_AVAILABLE and "HF_TOKEN" in os.environ:
71
  LOGDIR = "./logs"
72
  os.makedirs(LOGDIR, exist_ok=True)
73
 
74
- # --- Global Model State ---
75
  tokenizer = None
76
  model = None
77
  image_processor = None
@@ -79,7 +80,8 @@ context_len = None
79
  args = None
80
  model_initialized = False
81
 
82
- # ----------------- Utilities -----------------
 
83
 
84
  def _safe_upload(path: str):
85
  if api and repo_name and path and os.path.isfile(path):
@@ -93,18 +95,19 @@ def _safe_upload(path: str):
93
  except Exception as e:
94
  print(f"[upload] failed for {path}: {e}")
95
 
96
- def _conv_log_path():
97
  t = datetime.datetime.now()
98
  p = os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
99
  os.makedirs(os.path.dirname(p), exist_ok=True)
100
  return p
101
 
102
- def load_image_any(image_input):
103
  """
104
  Desteklenen:
105
  - URL (http/https)
106
  - Yerel dosya yolu
107
  - base64 (opsiyonel data URL prefix ile)
 
108
  """
109
  if isinstance(image_input, str):
110
  s = image_input.strip()
@@ -114,15 +117,16 @@ def load_image_any(image_input):
114
  return Image.open(BytesIO(r.content)).convert("RGB")
115
  if os.path.exists(s):
116
  return Image.open(s).convert("RGB")
117
- # base64
118
  if s.startswith("data:image"):
119
  s = s.split(",", 1)[1]
120
  raw = base64.b64decode(s)
121
  return Image.open(BytesIO(raw)).convert("RGB")
122
- elif isinstance(image_input, dict) and "image" in image_input:
 
123
  return load_image_any(image_input["image"])
124
- else:
125
- raise ValueError("Unsupported image input format")
126
 
127
  def _guess_conv_mode(model_path: str) -> str:
128
  name = get_model_name_from_path(model_path).lower()
@@ -139,6 +143,7 @@ def _wrap_image_token_if_needed(model_cfg) -> bool:
139
  return False
140
 
141
  def _build_prompt_and_ids(chatbot, user_text: str, device: torch.device):
 
142
  use_wrap = _wrap_image_token_if_needed(chatbot.model.config)
143
  if use_wrap:
144
  # <im_start><image><im_end>\n + user text
@@ -155,50 +160,50 @@ def _build_prompt_and_ids(chatbot, user_text: str, device: torch.device):
155
  ).unsqueeze(0).to(device)
156
  return prompt, input_ids
157
 
158
- def _stopping_keywords(chatbot, input_ids, extra: Optional[List[str]] = None):
159
- conv = chatbot.conversation
160
- stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
161
- keys = [stop_str]
162
- if extra:
163
- keys.extend([k for k in extra if isinstance(k, str) and k.strip()])
164
- return KeywordsStoppingCriteria(keys, chatbot.tokenizer, input_ids)
165
 
166
- # ----------------- Core Generation -----------------
 
167
 
168
  def generate_response(
169
  message_text: str,
170
  image_input,
171
  *,
172
  max_new_tokens: int = 1800,
173
- min_new_tokens: Optional[int] = None,
174
  temperature: float = 0.20,
175
  top_p: float = 0.95,
176
  repetition_penalty: float = 1.20,
177
  no_repeat_ngram_size: Optional[int] = 6,
178
  conv_mode_override: Optional[str] = None,
179
- det_seed: Optional[int] = None,
180
- no_stop: bool = False,
181
- custom_stop: Optional[List[str]] = None,
 
182
  ):
183
  if not (LLAVA_AVAILABLE and TRANSFORMERS_AVAILABLE):
184
  return {"error": "Required libraries not available (llava/transformers)"}
185
  if not message_text or image_input is None:
186
  return {"error": "Both 'message' and 'image' are required"}
187
 
188
- # Chat session (fresh conv each call, demo-like)
189
  chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
190
  if conv_mode_override and conv_mode_override in conv_templates:
191
  chatbot.conversation = conv_templates[conv_mode_override].copy()
192
  else:
193
  chatbot.conversation = conv_templates[chatbot.conv_mode].copy()
194
 
195
- # Load image
196
  try:
197
  pil_img = load_image_any(image_input)
198
  except Exception as e:
199
  return {"error": f"Failed to load image: {e}"}
200
 
201
- # Save image to logs (optional)
202
  img_hash, img_path = "NA", None
203
  try:
204
  buf = BytesIO(); pil_img.save(buf, format="JPEG"); raw = buf.getvalue()
@@ -211,17 +216,17 @@ def generate_response(
211
  except Exception as e:
212
  print(f"[log] saving image failed: {e}")
213
 
214
- # To device/dtype
215
  device = next(chatbot.model.parameters()).device
216
  dtype = next(chatbot.model.parameters()).dtype
217
 
218
- # Preprocess image -> tensor (support 3D/4D/5D)
219
  try:
220
  processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
221
  if isinstance(processed, torch.Tensor):
222
- if processed.ndim == 3: image_tensor = processed.unsqueeze(0)
223
- elif processed.ndim == 4: image_tensor = processed
224
- elif processed.ndim == 5: # (B,T,C,H,W) -> (B*T,C,H,W)
225
  b,t,c,h,w = processed.shape
226
  image_tensor = processed.reshape(b*t, c, h, w)
227
  else:
@@ -238,33 +243,42 @@ def generate_response(
238
  # Prompt & ids
239
  _, input_ids = _build_prompt_and_ids(chatbot, message_text, device)
240
 
241
- # Stopping criteria
242
- stopping = None if no_stop else _stopping_keywords(chatbot, input_ids, custom_stop)
243
- eos_id = chatbot.tokenizer.eos_token_id
244
- pad_id = chatbot.tokenizer.pad_token_id if chatbot.tokenizer.pad_token_id is not None else (eos_id if eos_id is not None else 0)
245
- eos_for_gen = None if no_stop else eos_id
246
-
247
- # Deterministic sampling (optional)
248
  if det_seed is not None:
249
  try:
250
- det_seed = int(det_seed)
251
- torch.manual_seed(det_seed)
252
- if torch.cuda.is_available():
253
- torch.cuda.manual_seed(det_seed)
254
- torch.cuda.manual_seed_all(det_seed)
255
  except Exception:
256
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- # Streamer (demo-like, avoids manual slicing)
259
  streamer = TextIteratorStreamer(
260
  chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
261
  )
262
 
 
 
 
263
  gen_kwargs = dict(
264
  inputs=input_ids,
265
  images=image_tensor,
266
  streamer=streamer,
267
- do_sample=True,
268
  temperature=float(temperature),
269
  top_p=float(top_p),
270
  repetition_penalty=float(repetition_penalty),
@@ -274,7 +288,7 @@ def generate_response(
274
  eos_token_id=eos_for_gen,
275
  length_penalty=1.0,
276
  early_stopping=False,
277
- stopping_criteria=None if no_stop else ([stopping] if stopping else None),
278
  )
279
 
280
  if no_repeat_ngram_size:
@@ -293,14 +307,24 @@ def generate_response(
293
  except Exception:
294
  pass
295
 
296
- # Generate in a background thread; collect streamed tokens
297
  try:
298
  t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
299
  t.start()
300
- chunks = []
301
  for piece in streamer:
302
  chunks.append(piece)
303
  text = "".join(chunks)
 
 
 
 
 
 
 
 
 
 
304
  chatbot.conversation.messages[-1][-1] = text
305
  except Exception as e:
306
  return {"error": f"Generation failed: {e}"}
@@ -323,7 +347,8 @@ def generate_response(
323
 
324
  return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
325
 
326
- # ----------------- Public API -----------------
 
327
 
328
  def query(payload: dict):
329
  """HF Endpoint entry (demo-like streaming)"""
@@ -339,12 +364,13 @@ def query(payload: dict):
339
  if not message.strip(): return {"error": "Missing 'message' text"}
340
  if image is None: return {"error": "Missing 'image'. Use 'image', 'image_url', or 'img'."}
341
 
342
- # Demo-like knobs
343
  max_new_tokens = int(payload.get("max_output_tokens", payload.get("max_new_tokens", payload.get("max_tokens", 1800))))
344
- min_new_tokens = payload.get("min_new_tokens", None)
345
- if min_new_tokens is not None:
346
- try: min_new_tokens = int(min_new_tokens)
347
- except Exception: min_new_tokens = None
 
348
 
349
  temperature = float(payload.get("temperature", 0.20))
350
  top_p = float(payload.get("top_p", 0.95))
@@ -356,12 +382,14 @@ def query(payload: dict):
356
  no_repeat_ngram = None
357
 
358
  conv_mode_override = payload.get("conv_mode", None)
 
359
  det_seed = payload.get("det_seed", None)
360
  if det_seed is not None:
361
  try: det_seed = int(det_seed)
362
  except Exception: det_seed = None
363
- no_stop = bool(payload.get("no_stop", False))
364
  custom_stop = payload.get("custom_stop", None)
 
365
 
366
  return generate_response(
367
  message_text=message,
@@ -373,9 +401,10 @@ def query(payload: dict):
373
  repetition_penalty=repetition_penalty,
374
  no_repeat_ngram_size=no_repeat_ngram,
375
  conv_mode_override=conv_mode_override,
 
376
  det_seed=det_seed,
377
- no_stop=no_stop,
378
  custom_stop=custom_stop,
 
379
  )
380
  except Exception as e:
381
  return {"error": f"Query failed: {e}"}
@@ -398,7 +427,8 @@ def get_model_info():
398
  "device": str(next(model.parameters()).device) if model else "Unknown",
399
  }
400
 
401
- # ----------------- Init & Session -----------------
 
402
 
403
  class _Args:
404
  def __init__(self):
@@ -409,6 +439,7 @@ class _Args:
409
  self.max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", "1800"))
410
  self.num_frames = 16
411
  self.load_8bit = bool(int(os.getenv("LOAD_8BIT", "0")))
 
412
  self.load_4bit = bool(int(os.getenv("LOAD_4BIT", "0")))
413
  self.debug = bool(int(os.getenv("DEBUG", "0")))
414
 
@@ -450,23 +481,32 @@ def initialize_model():
450
  try:
451
  args = _Args()
452
  model_name = get_model_name_from_path(args.model_path)
453
- tokenizer, model, image_processor, context_len = load_pretrained_model(
454
  args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
455
  )
 
456
  try:
457
- _ = next(model.parameters()).device
458
  except Exception:
459
  if torch.cuda.is_available():
460
- model = model.to(torch.device("cuda"))
461
- model.eval()
462
- chat_manager.init_if_needed(args, args.model_path, tokenizer, model, image_processor, context_len)
 
 
 
 
 
 
 
463
  print("[init] model/tokenizer/image_processor loaded.")
464
  return True
465
  except Exception as e:
466
  print(f"[init] failed: {e}")
467
  return False
468
 
469
- # ----------------- HF EndpointHandler -----------------
 
470
 
471
  class EndpointHandler:
472
  """Hugging Face Endpoint uyumlu sınıf"""
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ PULSE ECG Handler (demo-like streaming, stable & clean)
4
+ - TextIteratorStreamer + skip_prompt=True baş kesilmesi yok (Step 1 korunur)
5
+ - do_sample=True (demo davranışı), temperature/top_p payloaddan
6
+ - Anti-tekrar: no_repeat_ngram_size + repetition_penalty
7
+ - Opsiyonel: custom_stop (örn. "END OF REPORT") çıktı sonunda trim
8
+ - Deterministik mod: aynı görüntü+mesaj için aynı seed (deterministic=True)
9
+ - Görsel tensörü 3D/4D/5D uyumlu; device/dtype eşleştirme
10
  """
11
 
12
  import os
 
16
  import datetime
17
  from io import BytesIO
18
  from threading import Thread
19
+ from typing import Optional, List, Union
20
 
21
  import torch
22
  from PIL import Image
23
  import requests
24
 
25
+ # ---------- LLaVA & Transformers ----------
26
  try:
27
  from llava.constants import (
28
  IMAGE_TOKEN_INDEX,
 
36
  tokenizer_image_token,
37
  process_images,
38
  get_model_name_from_path,
 
39
  )
40
  from llava.utils import disable_torch_init
41
  LLAVA_AVAILABLE = True
 
50
  TRANSFORMERS_AVAILABLE = False
51
  print(f"[WARN] transformers not available: {e}")
52
 
53
+ # ---------- HF Hub (opsiyonel logging) ----------
54
  try:
55
  from huggingface_hub import HfApi, login
56
  HF_HUB_AVAILABLE = True
 
72
  LOGDIR = "./logs"
73
  os.makedirs(LOGDIR, exist_ok=True)
74
 
75
+ # ---------- Global Model State ----------
76
  tokenizer = None
77
  model = None
78
  image_processor = None
 
80
  args = None
81
  model_initialized = False
82
 
83
+
84
+ # ======================== Utilities ========================
85
 
86
  def _safe_upload(path: str):
87
  if api and repo_name and path and os.path.isfile(path):
 
95
  except Exception as e:
96
  print(f"[upload] failed for {path}: {e}")
97
 
98
+ def _conv_log_path() -> str:
99
  t = datetime.datetime.now()
100
  p = os.path.join(LOGDIR, f"{t.year:04d}-{t.month:02d}-{t.day:02d}-user_conv.json")
101
  os.makedirs(os.path.dirname(p), exist_ok=True)
102
  return p
103
 
104
+ def load_image_any(image_input: Union[str, dict]) -> Image.Image:
105
  """
106
  Desteklenen:
107
  - URL (http/https)
108
  - Yerel dosya yolu
109
  - base64 (opsiyonel data URL prefix ile)
110
+ - {"image": <base64|dataurl>}
111
  """
112
  if isinstance(image_input, str):
113
  s = image_input.strip()
 
117
  return Image.open(BytesIO(r.content)).convert("RGB")
118
  if os.path.exists(s):
119
  return Image.open(s).convert("RGB")
120
+ # base64 (dataurl olabilir)
121
  if s.startswith("data:image"):
122
  s = s.split(",", 1)[1]
123
  raw = base64.b64decode(s)
124
  return Image.open(BytesIO(raw)).convert("RGB")
125
+
126
+ if isinstance(image_input, dict) and "image" in image_input:
127
  return load_image_any(image_input["image"])
128
+
129
+ raise ValueError("Unsupported image input format")
130
 
131
  def _guess_conv_mode(model_path: str) -> str:
132
  name = get_model_name_from_path(model_path).lower()
 
143
  return False
144
 
145
  def _build_prompt_and_ids(chatbot, user_text: str, device: torch.device):
146
+ # Demo gibi: <image> token + text (IM_START/END gerekiyorsa sar)
147
  use_wrap = _wrap_image_token_if_needed(chatbot.model.config)
148
  if use_wrap:
149
  # <im_start><image><im_end>\n + user text
 
160
  ).unsqueeze(0).to(device)
161
  return prompt, input_ids
162
 
163
+ def _stable_seed_from(image_hash: str, message_text: str) -> int:
164
+ """Aynı resim+mesaj için aynı seed (deterministik örnekleme)"""
165
+ h = hashlib.md5((image_hash + "||" + message_text).encode("utf-8")).digest()
166
+ # 32-bit pozitif int
167
+ return int.from_bytes(h[:4], "big", signed=False)
 
 
168
 
169
+
170
+ # ======================== Core Generation ========================
171
 
172
  def generate_response(
173
  message_text: str,
174
  image_input,
175
  *,
176
  max_new_tokens: int = 1800,
177
+ min_new_tokens: Optional[int] = 700,
178
  temperature: float = 0.20,
179
  top_p: float = 0.95,
180
  repetition_penalty: float = 1.20,
181
  no_repeat_ngram_size: Optional[int] = 6,
182
  conv_mode_override: Optional[str] = None,
183
+ deterministic: bool = False, # True → do_sample=False (tam deterministik)
184
+ det_seed: Optional[int] = None, # verilirse sabit seed
185
+ custom_stop: Optional[List[str]] = None, # ["END OF REPORT"] gibi
186
+ no_stop: bool = False, # True → eos/stop yok (önerilmez)
187
  ):
188
  if not (LLAVA_AVAILABLE and TRANSFORMERS_AVAILABLE):
189
  return {"error": "Required libraries not available (llava/transformers)"}
190
  if not message_text or image_input is None:
191
  return {"error": "Both 'message' and 'image' are required"}
192
 
193
+ # Chat oturumu (her çağrıda taze template; demo benzeri)
194
  chatbot = chat_manager.get_chatbot(args, args.model_path, tokenizer, model, image_processor, context_len)
195
  if conv_mode_override and conv_mode_override in conv_templates:
196
  chatbot.conversation = conv_templates[conv_mode_override].copy()
197
  else:
198
  chatbot.conversation = conv_templates[chatbot.conv_mode].copy()
199
 
200
+ # Görseli yükle
201
  try:
202
  pil_img = load_image_any(image_input)
203
  except Exception as e:
204
  return {"error": f"Failed to load image: {e}"}
205
 
206
+ # Log için kaydet (hash + path)
207
  img_hash, img_path = "NA", None
208
  try:
209
  buf = BytesIO(); pil_img.save(buf, format="JPEG"); raw = buf.getvalue()
 
216
  except Exception as e:
217
  print(f"[log] saving image failed: {e}")
218
 
219
+ # Cihaza/dtype’a taşı
220
  device = next(chatbot.model.parameters()).device
221
  dtype = next(chatbot.model.parameters()).dtype
222
 
223
+ # Görüntü ön-işleme tensör (3D/4D/5D destek)
224
  try:
225
  processed = process_images([pil_img], chatbot.image_processor, chatbot.model.config)
226
  if isinstance(processed, torch.Tensor):
227
+ if processed.ndim == 3: image_tensor = processed.unsqueeze(0) # (1,C,H,W)
228
+ elif processed.ndim == 4: image_tensor = processed # (B,C,H,W)
229
+ elif processed.ndim == 5: # (B,T,C,H,W) (B*T,C,H,W)
230
  b,t,c,h,w = processed.shape
231
  image_tensor = processed.reshape(b*t, c, h, w)
232
  else:
 
243
  # Prompt & ids
244
  _, input_ids = _build_prompt_and_ids(chatbot, message_text, device)
245
 
246
+ # Seed ayarı
 
 
 
 
 
 
247
  if det_seed is not None:
248
  try:
249
+ s = int(det_seed)
 
 
 
 
250
  except Exception:
251
+ s = None
252
+ elif deterministic:
253
+ s = _stable_seed_from(img_hash, message_text)
254
+ else:
255
+ # Deterministik örnekleme istiyorsan; aynı girdide aynı sonuç için stabil seed de kullanabiliriz
256
+ s = _stable_seed_from(img_hash, message_text)
257
+
258
+ if s is not None:
259
+ torch.manual_seed(s)
260
+ if torch.cuda.is_available():
261
+ torch.cuda.manual_seed(s)
262
+ torch.cuda.manual_seed_all(s)
263
+
264
+ # Stopping / EOS
265
+ eos_id = chatbot.tokenizer.eos_token_id
266
+ pad_id = chatbot.tokenizer.pad_token_id if chatbot.tokenizer.pad_token_id is not None else (eos_id if eos_id is not None else 0)
267
+ eos_for_gen = None if no_stop else eos_id
268
 
269
+ # Streamer (demo gibi; manuel dilimleme yok → Step 1 korunur)
270
  streamer = TextIteratorStreamer(
271
  chatbot.tokenizer, skip_prompt=True, skip_special_tokens=True
272
  )
273
 
274
+ # do_sample: demo gibi (True). deterministic=True ise greedy’ye geç
275
+ do_sample = not deterministic
276
+
277
  gen_kwargs = dict(
278
  inputs=input_ids,
279
  images=image_tensor,
280
  streamer=streamer,
281
+ do_sample=do_sample,
282
  temperature=float(temperature),
283
  top_p=float(top_p),
284
  repetition_penalty=float(repetition_penalty),
 
288
  eos_token_id=eos_for_gen,
289
  length_penalty=1.0,
290
  early_stopping=False,
291
+ # stopping_criteria vermiyoruz LLaVA'daki KeywordsStoppingCriteria hatalarından kaçınmak için
292
  )
293
 
294
  if no_repeat_ngram_size:
 
307
  except Exception:
308
  pass
309
 
310
+ # Üretim (arka thread) + stream toplama
311
  try:
312
  t = Thread(target=chatbot.model.generate, kwargs=gen_kwargs)
313
  t.start()
314
+ chunks: List[str] = []
315
  for piece in streamer:
316
  chunks.append(piece)
317
  text = "".join(chunks)
318
+ # custom_stop varsa çıktıdan itibaren kırp
319
+ if custom_stop:
320
+ if isinstance(custom_stop, str):
321
+ custom_stop = [custom_stop]
322
+ for tag in custom_stop:
323
+ if isinstance(tag, str) and tag:
324
+ idx = text.find(tag)
325
+ if idx != -1:
326
+ text = text[:idx].rstrip()
327
+ break
328
  chatbot.conversation.messages[-1][-1] = text
329
  except Exception as e:
330
  return {"error": f"Generation failed: {e}"}
 
347
 
348
  return {"status": "success", "response": text, "conversation_id": id(chatbot.conversation)}
349
 
350
+
351
+ # ======================== Public API ========================
352
 
353
  def query(payload: dict):
354
  """HF Endpoint entry (demo-like streaming)"""
 
364
  if not message.strip(): return {"error": "Missing 'message' text"}
365
  if image is None: return {"error": "Missing 'image'. Use 'image', 'image_url', or 'img'."}
366
 
367
+ # Demo-like varsayılanlar
368
  max_new_tokens = int(payload.get("max_output_tokens", payload.get("max_new_tokens", payload.get("max_tokens", 1800))))
369
+ min_new_tokens = payload.get("min_new_tokens", 700)
370
+ try:
371
+ min_new_tokens = int(min_new_tokens) if min_new_tokens is not None else None
372
+ except Exception:
373
+ min_new_tokens = None
374
 
375
  temperature = float(payload.get("temperature", 0.20))
376
  top_p = float(payload.get("top_p", 0.95))
 
382
  no_repeat_ngram = None
383
 
384
  conv_mode_override = payload.get("conv_mode", None)
385
+ deterministic = bool(payload.get("deterministic", False))
386
  det_seed = payload.get("det_seed", None)
387
  if det_seed is not None:
388
  try: det_seed = int(det_seed)
389
  except Exception: det_seed = None
390
+
391
  custom_stop = payload.get("custom_stop", None)
392
+ no_stop = bool(payload.get("no_stop", False)) # genelde False kalsın
393
 
394
  return generate_response(
395
  message_text=message,
 
401
  repetition_penalty=repetition_penalty,
402
  no_repeat_ngram_size=no_repeat_ngram,
403
  conv_mode_override=conv_mode_override,
404
+ deterministic=deterministic,
405
  det_seed=det_seed,
 
406
  custom_stop=custom_stop,
407
+ no_stop=no_stop,
408
  )
409
  except Exception as e:
410
  return {"error": f"Query failed: {e}"}
 
427
  "device": str(next(model.parameters()).device) if model else "Unknown",
428
  }
429
 
430
+
431
+ # ======================== Init & Session ========================
432
 
433
  class _Args:
434
  def __init__(self):
 
439
  self.max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", "1800"))
440
  self.num_frames = 16
441
  self.load_8bit = bool(int(os.getenv("LOAD_8BIT", "0")))
442
+ # 4bit/8bit hız için açık bırakılabilir; accelerate devicemap kullanıyorsanız .to(cuda) gerekmez
443
  self.load_4bit = bool(int(os.getenv("LOAD_4BIT", "0")))
444
  self.debug = bool(int(os.getenv("DEBUG", "0")))
445
 
 
481
  try:
482
  args = _Args()
483
  model_name = get_model_name_from_path(args.model_path)
484
+ tokenizer_, model_, image_processor_, context_len_ = load_pretrained_model(
485
  args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
486
  )
487
+ # Device
488
  try:
489
+ _ = next(model_.parameters()).device
490
  except Exception:
491
  if torch.cuda.is_available():
492
+ model_ = model_.to(torch.device("cuda"))
493
+ model_.eval()
494
+
495
+ # assign globals
496
+ globals()["tokenizer"] = tokenizer_
497
+ globals()["model"] = model_
498
+ globals()["image_processor"] = image_processor_
499
+ globals()["context_len"] = context_len_
500
+
501
+ chat_manager.init_if_needed(args, args.model_path, tokenizer_, model_, image_processor_, context_len_)
502
  print("[init] model/tokenizer/image_processor loaded.")
503
  return True
504
  except Exception as e:
505
  print(f"[init] failed: {e}")
506
  return False
507
 
508
+
509
+ # ======================== HF EndpointHandler ========================
510
 
511
  class EndpointHandler:
512
  """Hugging Face Endpoint uyumlu sınıf"""