CanerDedeoglu
/

Rapid_ECG

@@ -1,341 +1,515 @@
-# -*- coding: utf-8 -*-
-# handler.py — Rapid_ECG / PULSE-7B — Startup-load, Stabil ve DEBUG'li sürüm
-# - Sunucu açılır açılmaz model yüklenir (cold start only once)
-# - HF Endpoint sözleşmesi (EndpointHandler.load().__call__)
-# - Yerel (HF_MODEL_DIR) → Hub (HF_MODEL_ID) yükleme sırası
-# - Görsel sadece .preprocess() ile işlenir (process_images yok)
-# - Vision tower kontrolü: mm_vision_tower veya vision_tower
-# - IMAGE_TOKEN_INDEX kullanımı ve kapsamlı [DEBUG] logları
 import os
-import io
-import sys
-import base64
-import subprocess
-from typing import Any, Dict, Optional
 import torch
-from PIL import Image
 import requests
-import os
-os.environ.setdefault("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION", "python")
-# ===== LLaVA kütüphanesini garantiye al =====
-def _ensure_llava(tag: str = "v1.2.0"):
-    try:
-        import llava  # noqa
-        print("[DEBUG] LLaVA already available.")
-        return
-    except ImportError:
-        print(f"[DEBUG] LLaVA not found; installing (tag={tag}) ...")
-        subprocess.check_call([
-            sys.executable, "-m", "pip", "install",
-            f"git+https://github.com/haotian-liu/LLaVA@{tag}#egg=llava"
-        ])
-        print("[DEBUG] LLaVA installed.")
-_ensure_llava("v1.2.0")
-# ===== LLaVA importları =====
-from llava.conversation import conv_templates
 from llava.constants import (
     DEFAULT_IMAGE_TOKEN,
     DEFAULT_IM_START_TOKEN,
     DEFAULT_IM_END_TOKEN,
-    IMAGE_TOKEN_INDEX,
 )
 from llava.model.builder import load_pretrained_model
-from llava.mm_utils import tokenizer_image_token, get_model_name_from_path
-# ---------- yardımcılar ----------
-def _get_env(name: str, default: Optional[str] = None) -> Optional[str]:
-    v = os.getenv(name)
-    return v if v not in (None, "") else default
-def _pick_device() -> torch.device:
-    if torch.cuda.is_available():
-        dev = torch.device("cuda")
-    elif torch.backends.mps.is_available():
-        dev = torch.device("mps")
-    else:
-        dev = torch.device("cpu")
-    print(f"[DEBUG] pick_device -> {dev}")
-    return dev
-def _pick_dtype(device: torch.device):
-    if device.type == "cuda":
-        dt = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
-    else:
-        dt = torch.float32
-    print(f"[DEBUG] pick_dtype({device}) -> {dt}")
-    return dt
-def _is_probably_base64(s: str) -> bool:
-    s = s.strip()
-    if s.startswith("data:image"):
-        return True
-    allowed = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r")
-    return len(s) % 4 == 0 and all(c in allowed for c in s)
-def _load_image_from_any(image_input: Any) -> Image.Image:
-    print(f"[DEBUG] _load_image_from_any type={type(image_input)}")
-    if isinstance(image_input, Image.Image):
-        return image_input.convert("RGB")
-    if isinstance(image_input, (bytes, bytearray)):
-        return Image.open(io.BytesIO(image_input)).convert("RGB")
-    if hasattr(image_input, "read"):
-        return Image.open(image_input).convert("RGB")
-    if isinstance(image_input, str):
-        s = image_input.strip()
-        if s.startswith("data:image"):
-            try:
-                _, b64 = s.split(",", 1)
-                data = base64.b64decode(b64)
-                return Image.open(io.BytesIO(data)).convert("RGB")
-            except Exception as e:
-                raise ValueError(f"Bad data URL: {e}")
-        if _is_probably_base64(s) and not s.startswith(("http://", "https://")):
-            try:
-                data = base64.b64decode(s)
-                return Image.open(io.BytesIO(data)).convert("RGB")
-            except Exception as e:
-                raise ValueError(f"Bad base64 image: {e}")
-        if s.startswith(("http://", "https://")):
-            resp = requests.get(s, timeout=20)
-            resp.raise_for_status()
-            return Image.open(io.BytesIO(resp.content)).convert("RGB")
-        # local path
-        return Image.open(s).convert("RGB")
-    raise ValueError(f"Unsupported image input type: {type(image_input)}")
-def _get_conv_mode(model_name: str) -> str:
-    name = (model_name or "").lower()
-    if "llama-2" in name:
-        return "llava_llama_2"
-    if "mistral" in name:
-        return "mistral_instruct"
-    if "v1.6-34b" in name:
-        return "chatml_direct"
-    if "v1" in name or "pulse" in name:
-        return "llava_v1"
-    if "mpt" in name:
-        return "mpt"
-    return "llava_v0"
-def _build_prompt_with_image(prompt: str, model_cfg) -> str:
-    # Kullanıcı image token eklediyse yeniden eklemeyelim
-    if DEFAULT_IMAGE_TOKEN in prompt or DEFAULT_IM_START_TOKEN in prompt:
-        return prompt
-    if getattr(model_cfg, "mm_use_im_start_end", False):
-        token = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
-        return f"{token}\n{prompt}"
-    return f"{DEFAULT_IMAGE_TOKEN}\n{prompt}"
-def _resolve_model_path(model_dir_hint: Optional[str], default_dir: str = "/repository") -> str:
-    # Öncelik: HF_MODEL_DIR (yerel) -> ctor'dan gelen model_dir_hint -> default_dir
-    p = _get_env("HF_MODEL_DIR") or model_dir_hint or default_dir
-    p = os.path.abspath(p)
-    print(f"[DEBUG] resolved model path: {p}")
-    return p
-# ---------- Endpoint Handler ----------
-class EndpointHandler:
-    def __init__(self, model_dir: Optional[str] = None):
-        # DEBUG banner
-        print("🚀 Starting up PULSE-7B handler (startup load)...")
-        print("📝 Enhanced by Ubden® Team")
-        print(f"🔧 Python: {sys.version}")
-        print(f"🔧 PyTorch: {torch.__version__}")
         try:
-            import transformers
-            print(f"🔧 Transformers: {transformers.__version__}")
         except Exception as e:
-            print(f"[DEBUG] transformers import failed: {e}")
-        self.model_dir = model_dir
-        self.device = _pick_device()
-        self.dtype = _pick_dtype(self.device)
-        # Ortam ayarları (flash attn ipucu, zarar vermez)
-        os.environ.setdefault("ATTN_IMPLEMENTATION", "flash_attention_2")
-        os.environ.setdefault("FLASH_ATTENTION", "1")
-        print(f"[DEBUG] ATTN_IMPLEMENTATION={os.getenv('ATTN_IMPLEMENTATION')} FLASH_ATTENTION={os.getenv('FLASH_ATTENTION')}")
-        # Model/Tokenizer/ImageProcessor konteynerleri
-        self.model = None
-        self.tokenizer = None
-        self.image_processor = None
-        self.context_len = None
-        self.model_name = None
-        # ---- Modeli burada (startup’ta) yükle ----
-        try:
-            self._startup_load_model()
-            print("✅ Model loaded & ready in __init__")
-        except Exception as e:
-            print(f"💥 CRITICAL: model startup load failed: {e}")
-            raise
-    def _startup_load_model(self):
-        # Yerel dizin varsa onu kullan, yoksa hub
-        local_path = _resolve_model_path(self.model_dir)
-        use_local = os.path.isdir(local_path) and any(
-            os.path.exists(os.path.join(local_path, f))
-            for f in ("config.json", "tokenizer_config.json")
         )
-        model_base = _get_env("HF_MODEL_BASE", None)
-        if use_local:
-            model_path = local_path
-            print(f"[DEBUG] loading model LOCALLY from: {model_path}")
-        else:
-            model_path = _get_env("HF_MODEL_ID", "PULSE-ECG/PULSE-7B")
-            print(f"[DEBUG] loading model from HUB: {model_path} (HF_MODEL_BASE={model_base})")
-        # ⬇️ FIX: LLaVA v1.2.0 imzası model_name parametresi istiyor
         model_name = get_model_name_from_path(model_path)
-        print(f"[DEBUG] resolved model_name: {model_name}")
-        print("[DEBUG] calling load_pretrained_model ...")
-        self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
-            model_path=model_path,
-            model_base=model_base,
-            model_name=model_name,         # <-- gerekli parametre
-            load_8bit=False,
-            load_4bit=False,
-            device_map="auto",
-            device=self.device,
-        )
-        self.model_name = getattr(self.model.config, "name_or_path", str(model_path))
-        print(f"[DEBUG] model loaded: name={self.model_name}")
-        # Vision tower kontrolü (yeni/eskı alan adları)
-        vt = (
-            getattr(self.model.config, "mm_vision_tower", None)
-            or getattr(self.model.config, "vision_tower", None)
-        )
-        print(f"[DEBUG] vision tower: {vt}")
-        if self.image_processor is None or vt is None:
-            raise RuntimeError(
-                "[ERROR] Vision tower not loaded (mm_vision_tower/vision_tower None). "
-                "Yerel yükleme için HF_MODEL_DIR doğru klasörü göstermeli; "
-                "Hub için HF_MODEL_ID PULSE/LLaVA tabanlı olmalı (örn: 'PULSE-ECG/PULSE-7B')."
             )
-        # Tokenizer güvenliği
-        try:
-            self.tokenizer.padding_side = "left"
-            if getattr(self.tokenizer, "pad_token_id", None) is None:
-                self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
-        except Exception as e:
-            print(f"[DEBUG] tokenizer safety patch failed: {e}")
-        self.model.eval()
-    # HF inference toolkit load() yine çağıracağı için no-op
-    def load(self):
-        print("[DEBUG] load(): model is already initialized in __init__")
-        return True
-    @torch.inference_mode()
-    def __call__(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        print(f"[DEBUG] __call__ inputs keys={list(inputs.keys()) if hasattr(inputs,'keys') else 'N/A'}")
-        # HF {"inputs": {...}} sarmasını aç
-        if "inputs" in inputs and isinstance(inputs["inputs"], dict):
-            inputs = inputs["inputs"]
-        prompt = inputs.get("query") or inputs.get("prompt") or inputs.get("istem") or ""
-        image_in = inputs.get("image") or inputs.get("image_url") or inputs.get("img")
-        if not image_in:
-            return {"error": "Missing 'image' in payload"}
-        if not isinstance(prompt, str) or not prompt.strip():
-            return {"error": "Missing 'query'/'prompt' text"}
-        # Üretim parametreleri
-        temperature = float(inputs.get("temperature", 0.0))
-        top_p = float(inputs.get("top_p", 0.9))
-        max_new = int(inputs.get("max_new_tokens", inputs.get("max_tokens", 512)))
-        repetition_penalty = float(inputs.get("repetition_penalty", 1.0))
-        conv_mode_override = inputs.get("conv_mode") or _get_env("CONV_MODE", None)
-        # ---- Görsel yükle + preprocess
-        try:
-            image = _load_image_from_any(image_in)
-            print(f"[DEBUG] loaded image size={image.size}")
-        except Exception as e:
-            return {"error": f"Failed to load image: {e}"}
-        if self.image_processor is None:
-            return {"error": "image_processor is None; model not initialized properly (no vision tower)"}
-        try:
-            out = self.image_processor.preprocess(image, return_tensors="pt")
-            images_tensor = out["pixel_values"].to(self.device, dtype=self.dtype)
-            image_sizes = [image.size]
-            print(f"[DEBUG] preprocess OK; images_tensor.shape={images_tensor.shape}")
-        except Exception as e:
-            return {"error": f"Image preprocessing failed: {e}"}
-        # ---- Konuşma + prompt
-        mode = conv_mode_override or _get_conv_mode(self.model_name)
-        conv = (conv_templates.get(mode) or conv_templates[list(conv_templates.keys())[0]]).copy()
-        conv.append_message(conv.roles[0], _build_prompt_with_image(prompt.strip(), self.model.config))
-        conv.append_message(conv.roles[1], None)
-        full_prompt = conv.get_prompt()
-        print(f"[DEBUG] conv_mode={mode}; full_prompt_len={len(full_prompt)}")
-        # ---- Tokenization (IMAGE_TOKEN_INDEX ile)
         try:
-            input_ids = tokenizer_image_token(
-                full_prompt, self.tokenizer, image_token_index=IMAGE_TOKEN_INDEX, return_tensors="pt"
-            ).unsqueeze(0).to(self.device)
-            print(f"[DEBUG] tokenizer_image_token OK; input_ids.shape={input_ids.shape}")
         except Exception as e:
-            print(f"[DEBUG] tokenizer_image_token failed: {e}; fallback to plain tokenizer")
-            try:
-                toks = self.tokenizer([full_prompt], return_tensors="pt", padding=True, truncation=True)
-                input_ids = toks["input_ids"].to(self.device)
-                print(f"[DEBUG] plain tokenizer OK; input_ids.shape={input_ids.shape}")
-            except Exception as e2:
-                return {"error": f"Tokenization failed: {e} / {e2}"}
-        attention_mask = torch.ones_like(input_ids, device=self.device)
-        # ---- Generate
-        try:
-            print(f"[DEBUG] generate(max_new_tokens={max_new}, temp={temperature}, top_p={top_p}, rep={repetition_penalty})")
-            gen_ids = self.model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                images=images_tensor,
-                image_sizes=image_sizes,
-                do_sample=(temperature > 0),
                 temperature=temperature,
                 top_p=top_p,
-                max_new_tokens=max_new,
-                repetition_penalty=repetition_penalty,
-                use_cache=True,
             )
-            print(f"[DEBUG] generate OK; gen_ids.shape={gen_ids.shape}")
-        except Exception as e:
-            return {"error": f"Generation failed: {e}"}
-        # ---- Decode (sadece yeni tokenlar)
-        try:
-            new_tokens = gen_ids[0, input_ids.shape[1]:]
-            text = self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
-            print(f"[DEBUG] decoded_text_len={len(text)}")
-        except Exception as e:
-            return {"error": f"Decode failed: {e}"}
         return {
-            "generated_text": text,
-            "model": self.model_name,
-            "conv_mode": mode,
         }

 import os
+import cv2
+import datetime
 import torch
+import numpy as np
+import hashlib
+import PIL
+import base64
+import json
 import requests
+from PIL import Image
+from io import BytesIO
+from transformers import TextStreamer, TextIteratorStreamer
+from threading import Thread
+from llava import conversation as conversation_lib
+from llava.constants import DEFAULT_IMAGE_TOKEN
 from llava.constants import (
+    IMAGE_TOKEN_INDEX,
     DEFAULT_IMAGE_TOKEN,
     DEFAULT_IM_START_TOKEN,
     DEFAULT_IM_END_TOKEN,
 )
+from llava.conversation import conv_templates, SeparatorStyle
 from llava.model.builder import load_pretrained_model
+from llava.utils import disable_torch_init
+from llava.mm_utils import (
+    tokenizer_image_token,
+    process_images,
+    get_model_name_from_path,
+    KeywordsStoppingCriteria,
+)
+import spaces
+from huggingface_hub import HfApi
+from huggingface_hub import login
+from huggingface_hub import revision_exists
+# Initialize Hugging Face API
+if "HF_TOKEN" in os.environ:
+    login(token=os.environ["HF_TOKEN"], write_permission=True)
+    api = HfApi()
+    repo_name = os.environ.get("LOG_REPO", "")
+else:
+    api = None
+    repo_name = ""
+external_log_dir = "./logs"
+LOGDIR = external_log_dir
+VOTEDIR = "./votes"
+# Global variables for model and tokenizer
+tokenizer = None
+model = None
+image_processor = None
+context_len = None
+args = None
+# Gradio artık kullanılmıyor - Hugging Face endpoint için gerekli değil
+def get_conv_log_filename():
+    t = datetime.datetime.now()
+    name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-user_conv.json")
+    return name
+def get_conv_vote_filename():
+    t = datetime.datetime.now()
+    name = os.path.join(VOTEDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-user_vote.json")
+    if not os.path.isfile(name):
+        os.makedirs(os.path.dirname(name), exist_ok=True)
+    return name
+def vote_last_response(state, vote_type, model_selector):
+    if api and repo_name:
+        with open(get_conv_vote_filename(), "a") as fout:
+            data = {
+                "type": vote_type,
+                "model": model_selector,
+                "state": state,
+            }
+            fout.write(json.dumps(data) + "\n")
         try:
+            api.upload_file(
+                path_or_fileobj=get_conv_vote_filename(),
+                path_in_repo=get_conv_vote_filename().replace("./votes/", ""),
+                repo_id=repo_name,
+                repo_type="dataset")
         except Exception as e:
+            print(f"Failed to upload vote file: {e}")
+def is_valid_video_filename(name):
+    video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"]
+    ext = name.split(".")[-1].lower()
+    return ext in video_extensions
+def is_valid_image_filename(name):
+    image_extensions = ["jpg", "jpeg", "png", "bmp", "gif", "tiff", "webp", "heic", "heif", "jfif", "svg", "eps", "raw"]
+    ext = name.split(".")[-1].lower()
+    return ext in image_extensions
+def sample_frames(video_file, num_frames):
+    video = cv2.VideoCapture(video_file)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    interval = total_frames // num_frames
+    frames = []
+    for i in range(total_frames):
+        ret, frame = video.read()
+        if not ret:
+            continue
+        if i % interval == 0:
+            pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            frames.append(pil_img)
+    video.release()
+    return frames
+def load_image(image_file):
+    if image_file.startswith("http") or image_file.startswith("https"):
+        response = requests.get(image_file)
+        if response.status_code == 200:
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+        else:
+            raise ValueError("Failed to load image from URL")
+    else:
+        print("Load image from local file")
+        print(image_file)
+        image = Image.open(image_file).convert("RGB")
+    return image
+def process_base64_image(base64_string):
+    """Process base64 encoded image string"""
+    try:
+        # Remove data URL prefix if present
+        if base64_string.startswith('data:image'):
+            base64_string = base64_string.split(',')[1]
+        # Decode base64 to bytes
+        image_data = base64.b64decode(base64_string)
+        # Convert to PIL Image
+        image = Image.open(BytesIO(image_data)).convert("RGB")
+        return image
+    except Exception as e:
+        raise ValueError(f"Failed to process base64 image: {e}")
+def process_image_input(image_input):
+    """Process different types of image input (file path, URL, or base64)"""
+    if isinstance(image_input, str):
+        if image_input.startswith("http"):
+            return load_image(image_input)
+        elif os.path.exists(image_input):
+            return load_image(image_input)
+        else:
+            # Try to process as base64
+            return process_base64_image(image_input)
+    elif isinstance(image_input, dict) and "image" in image_input:
+        # Handle base64 image from dict
+        return process_base64_image(image_input["image"])
+    else:
+        raise ValueError("Unsupported image input format")
+class InferenceDemo(object):
+    def __init__(self, args, model_path, tokenizer, model, image_processor, context_len) -> None:
+        disable_torch_init()
+        self.tokenizer, self.model, self.image_processor, self.context_len = (
+            tokenizer,
+            model,
+            image_processor,
+            context_len,
         )
         model_name = get_model_name_from_path(model_path)
+        if "llama-2" in model_name.lower():
+            conv_mode = "llava_llama_2"
+        elif "v1" in model_name.lower() or "pulse" in model_name.lower():
+            conv_mode = "llava_v1"
+        elif "mpt" in model_name.lower():
+            conv_mode = "mpt"
+        elif "qwen" in model_name.lower():
+            conv_mode = "qwen_1_5"
+        else:
+            conv_mode = "llava_v0"
+        if args.conv_mode is not None and conv_mode != args.conv_mode:
+            print(
+                "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format(
+                    conv_mode, args.conv_mode, args.conv_mode
+                )
             )
+        else:
+            args.conv_mode = conv_mode
+        self.conv_mode = conv_mode
+        self.conversation = conv_templates[args.conv_mode].copy()
+        self.num_frames = args.num_frames
+class ChatSessionManager:
+    def __init__(self):
+        self.chatbot_instance = None
+    def initialize_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
+        self.chatbot_instance = InferenceDemo(args, model_path, tokenizer, model, image_processor, context_len)
+        print(f"Initialized Chatbot instance with ID: {id(self.chatbot_instance)}")
+    def reset_chatbot(self):
+        self.chatbot_instance = None
+    def get_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
+        if self.chatbot_instance is None:
+            self.initialize_chatbot(args, model_path, tokenizer, model, image_processor, context_len)
+        return self.chatbot_instance
+chat_manager = ChatSessionManager()
+def clear_history():
+    """Clear conversation history"""
+    chatbot_instance = chat_manager.get_chatbot(args, model_path, tokenizer, model, image_processor, context_len)
+    chatbot_instance.conversation = conv_templates[chatbot_instance.conv_mode].copy()
+    return {"status": "success", "message": "Conversation history cleared"}
+def add_message(message_text, image_input=None):
+    """Add a message to the conversation"""
+    global chat_image_num
+    if not hasattr(add_message, 'chat_image_num'):
+        add_message.chat_image_num = 0
+    if image_input:
+        add_message.chat_image_num += 1
+        if add_message.chat_image_num > 1:
+            chat_manager.reset_chatbot()
+            add_message.chat_image_num = 1
+    return {"status": "success", "message": "Message added"}
+@spaces.GPU
+def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, max_output_tokens=4096):
+    """Generate response for the given message and image"""
+    try:
+        if not message_text or not image_input:
+            return {"error": "Both message text and image are required"}
+        our_chatbot = chat_manager.get_chatbot(args, model_path, tokenizer, model, image_processor, context_len)
+        # Process image input
         try:
+            image = process_image_input(image_input)
         except Exception as e:
+            return {"error": f"Failed to process image: {str(e)}"}
+        # Save image for logging
+        all_image_hash = []
+        all_image_path = []
+        # Generate hash for the image
+        img_byte_arr = BytesIO()
+        image.save(img_byte_arr, format='JPEG')
+        img_byte_arr = img_byte_arr.getvalue()
+        image_hash = hashlib.md5(img_byte_arr).hexdigest()
+        all_image_hash.append(image_hash)
+        # Save image to logs
+        t = datetime.datetime.now()
+        filename = os.path.join(
+            LOGDIR,
+            "serve_images",
+            f"{t.year}-{t.month:02d}-{t.day:02d}",
+            f"{image_hash}.jpg",
+        )
+        all_image_path.append(filename)
+        if not os.path.isfile(filename):
+            os.makedirs(os.path.dirname(filename), exist_ok=True)
+            print("image save to", filename)
+            image.save(filename)
+        # Process image for model
+        image_tensor = process_images([image], our_chatbot.image_processor, our_chatbot.model.config)[0]
+        image_tensor = image_tensor.half().to(our_chatbot.model.device)
+        image_tensor = image_tensor.unsqueeze(0)
+        # Prepare conversation
+        inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
+        our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
+        our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
+        prompt = our_chatbot.conversation.get_prompt()
+        # Tokenize input
+        input_ids = tokenizer_image_token(
+            prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+        ).unsqueeze(0).to(our_chatbot.model.device)
+        # Set up stopping criteria
+        stop_str = (
+            our_chatbot.conversation.sep
+            if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
+            else our_chatbot.conversation.sep2
+        )
+        keywords = [stop_str]
+        stopping_criteria = KeywordsStoppingCriteria(
+            keywords, our_chatbot.tokenizer, input_ids
+        )
+        # Generate response
+        with torch.no_grad():
+            outputs = our_chatbot.model.generate(
+                inputs=input_ids,
+                images=image_tensor,
+                do_sample=True,
                 temperature=temperature,
                 top_p=top_p,
+                max_new_tokens=max_output_tokens,
+                use_cache=False,
+                stopping_criteria=[stopping_criteria],
             )
+        # Decode response
+        response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
+        our_chatbot.conversation.messages[-1][-1] = response
+        # Log conversation
+        history = [(message_text, response)]
+        with open(get_conv_log_filename(), "a") as fout:
+            data = {
+                "type": "chat",
+                "model": "PULSE-7b",
+                "state": history,
+                "images": all_image_hash,
+                "images_path": all_image_path
+            }
+            print("#### conv log", data)
+            fout.write(json.dumps(data) + "\n")
+        # Upload files to Hugging Face if configured
+        if api and repo_name:
+            try:
+                for upload_img in all_image_path:
+                    api.upload_file(
+                        path_or_fileobj=upload_img,
+                        path_in_repo=upload_img.replace("./logs/", ""),
+                        repo_id=repo_name,
+                        repo_type="dataset",
+                    )
+                # Upload conversation log
+                api.upload_file(
+                    path_or_fileobj=get_conv_log_filename(),
+                    path_in_repo=get_conv_log_filename().replace("./logs/", ""),
+                    repo_id=repo_name,
+                    repo_type="dataset")
+            except Exception as e:
+                print(f"Failed to upload files: {e}")
         return {
+            "status": "success",
+            "response": response,
+            "conversation_id": id(our_chatbot.conversation)
         }
+    except Exception as e:
+        return {"error": f"Generation failed: {str(e)}"}
+def upvote_last_response(conversation_id):
+    """Upvote the last response"""
+    try:
+        vote_last_response({"conversation_id": conversation_id}, "upvote", "PULSE-7B")
+        return {"status": "success", "message": "Thank you for your voting!"}
+    except Exception as e:
+        return {"error": f"Failed to upvote: {str(e)}"}
+def downvote_last_response(conversation_id):
+    """Downvote the last response"""
+    try:
+        vote_last_response({"conversation_id": conversation_id}, "downvote", "PULSE-7B")
+        return {"status": "success", "message": "Thank you for your voting!"}
+    except Exception as e:
+        return {"error": f"Failed to downvote: {str(e)}"}
+def flag_response(conversation_id):
+    """Flag the last response"""
+    try:
+        vote_last_response({"conversation_id": conversation_id}, "flag", "PULSE-7B")
+        return {"status": "success", "message": "Response flagged successfully"}
+    except Exception as e:
+        return {"error": f"Failed to flag response: {str(e)}"}
+# Initialize model when module is imported
+def initialize_model():
+    """Initialize the model and tokenizer"""
+    global tokenizer, model, image_processor, context_len, args
+    try:
+        # Set default arguments
+        class Args:
+            def __init__(self):
+                self.model_path = "PULSE-ECG/PULSE-7B"
+                self.model_base = None
+                self.num_gpus = 1
+                self.conv_mode = None
+                self.temperature = 0.05
+                self.max_new_tokens = 1024
+                self.num_frames = 16
+                self.load_8bit = False
+                self.load_4bit = False
+                self.debug = False
+        args = Args()
+        # Load model
+        model_path = args.model_path
+        model_name = get_model_name_from_path(args.model_path)
+        tokenizer, model, image_processor, context_len = load_pretrained_model(
+            args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
+        )
+        print("### image_processor", image_processor)
+        print("### tokenizer", tokenizer)
+        # Move model to GPU if available
+        if torch.cuda.is_available():
+            model = model.to(torch.device('cuda'))
+            print("Model moved to CUDA")
+        else:
+            print("CUDA not available, using CPU")
+        return True
+    except Exception as e:
+        print(f"Failed to initialize model: {e}")
+        return False
+# Initialize model on import
+model_initialized = initialize_model()
+# Main endpoint function for Hugging Face
+def query(payload):
+    """Main endpoint function for Hugging Face inference API"""
+    if not model_initialized:
+        return {"error": "Model not initialized"}
+    try:
+        # Extract parameters from payload
+        message_text = payload.get("message", "")
+        image_input = payload.get("image", None)
+        temperature = payload.get("temperature", 0.05)
+        top_p = payload.get("top_p", 1.0)
+        max_output_tokens = payload.get("max_output_tokens", 4096)
+        if not message_text or not image_input:
+            return {"error": "Both 'message' and 'image' are required in the payload"}
+        # Generate response
+        result = generate_response(
+            message_text=message_text,
+            image_input=image_input,
+            temperature=temperature,
+            top_p=top_p,
+            max_output_tokens=max_output_tokens
+        )
+        return result
+    except Exception as e:
+        return {"error": f"Query failed: {str(e)}"}
+# Additional utility endpoints
+def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_initialized": model_initialized,
+        "cuda_available": torch.cuda.is_available()
+    }
+def get_model_info():
+    """Get model information"""
+    if not model_initialized:
+        return {"error": "Model not initialized"}
+    return {
+        "model_path": args.model_path if args else "Unknown",
+        "model_type": "PULSE-7B",
+        "cuda_available": torch.cuda.is_available(),
+        "device": str(model.device) if model else "Unknown"
+    }
+# For backward compatibility and testing
+if __name__ == "__main__":
+    import argparse
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--server_name", default="0.0.0.0", type=str)
+    argparser.add_argument("--port", default="6123", type=str)
+    argparser.add_argument("--model_path", default="PULSE-ECG/PULSE-7B", type=str)
+    argparser.add_argument("--model-base", type=str, default=None)
+    argparser.add_argument("--num-gpus", type=int, default=1)
+    argparser.add_argument("--conv-mode", type=str, default=None)
+    argparser.add_argument("--temperature", type=float, default=0.05)
+    argparser.add_argument("--max-new-tokens", type=int, default=1024)
+    argparser.add_argument("--num_frames", type=int, default=16)
+    argparser.add_argument("--load-8bit", action="store_true")
+    argparser.add_argument("--load-4bit", action="store_true")
+    argparser.add_argument("--debug", action="store_true")
+    args = argparser.parse_args()
+    model_path = args.model_path
+    filt_invalid = "cut"
+    model_name = get_model_name_from_path(args.model_path)
+    tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit)
+    print("### image_processor",image_processor)
+    print("### tokenzier",tokenizer)
+    model=model.to(torch.device('cuda'))
+    print("Model initialized successfully!")
+    print("This handler is now ready for Hugging Face endpoints.")
+    print("Use the 'query' function as the main endpoint.")