CanerDedeoglu
/

Rapid_ECG

@@ -1,9 +1,3 @@
-# -*- coding: utf-8 -*-
-# handler.py — PULSE-7B / LLaVA robust endpoint
-# - Safe decode (empty output fix)
-# - PAD/EOS safety
-# - Hugging Face endpoint compatible
 import os
 import datetime
 import torch
@@ -49,7 +43,7 @@ except ImportError as e:
 # Try to import transformers
 try:
-    from transformers import TextStreamer, TextIteratorStreamer, GenerationConfig
     TRANSFORMERS_AVAILABLE = True
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
@@ -81,7 +75,7 @@ external_log_dir = "./logs"
 LOGDIR = external_log_dir
 VOTEDIR = "./votes"
-# Global variables
 tokenizer = None
 model = None
 image_processor = None
@@ -121,7 +115,7 @@ def vote_last_response(state, vote_type, model_selector):
 def is_valid_video_filename(name):
     if not CV2_AVAILABLE:
-        return False
     video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"]
     ext = name.split(".")[-1].lower()
     return ext in video_extensions
@@ -133,7 +127,8 @@ def is_valid_image_filename(name):
 def sample_frames(video_file, num_frames):
     if not CV2_AVAILABLE:
-        raise ImportError("cv2 not available")
     video = cv2.VideoCapture(video_file)
     total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     interval = total_frames // num_frames
@@ -149,32 +144,46 @@ def sample_frames(video_file, num_frames):
     return frames
 def load_image(image_file):
-    if image_file.startswith("http"):
         response = requests.get(image_file)
         if response.status_code == 200:
             image = Image.open(BytesIO(response.content)).convert("RGB")
         else:
             raise ValueError("Failed to load image from URL")
     else:
         image = Image.open(image_file).convert("RGB")
     return image
 def process_base64_image(base64_string):
-    if base64_string.startswith('data:image'):
-        base64_string = base64_string.split(',')[1]
-    image_data = base64.b64decode(base64_string)
-    image = Image.open(BytesIO(image_data)).convert("RGB")
-    return image
 def process_image_input(image_input):
     if isinstance(image_input, str):
         if image_input.startswith("http"):
             return load_image(image_input)
         elif os.path.exists(image_input):
             return load_image(image_input)
         else:
             return process_base64_image(image_input)
     elif isinstance(image_input, dict) and "image" in image_input:
         return process_base64_image(image_input["image"])
     else:
         raise ValueError("Unsupported image input format")
@@ -185,9 +194,14 @@ class InferenceDemo(object):
             raise ImportError("LLaVA modules not available")
         disable_torch_init()
         self.tokenizer, self.model, self.image_processor, self.context_len = (
-            tokenizer, model, image_processor, context_len
         )
         model_name = get_model_name_from_path(model_path)
         if "llama-2" in model_name.lower():
             conv_mode = "llava_llama_2"
@@ -199,8 +213,13 @@ class InferenceDemo(object):
             conv_mode = "qwen_1_5"
         else:
             conv_mode = "llava_v0"
         if args.conv_mode is not None and conv_mode != args.conv_mode:
-            print(f"[WARNING] auto inferred conv_mode={conv_mode}, using {args.conv_mode}")
         else:
             args.conv_mode = conv_mode
         self.conv_mode = conv_mode
@@ -210,11 +229,14 @@ class InferenceDemo(object):
 class ChatSessionManager:
     def __init__(self):
         self.chatbot_instance = None
     def initialize_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
         self.chatbot_instance = InferenceDemo(args, model_path, tokenizer, model, image_processor, context_len)
         print(f"Initialized Chatbot instance with ID: {id(self.chatbot_instance)}")
     def reset_chatbot(self):
         self.chatbot_instance = None
     def get_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
         if self.chatbot_instance is None:
             self.initialize_chatbot(args, model_path, tokenizer, model, image_processor, context_len)
@@ -223,139 +245,242 @@ class ChatSessionManager:
 chat_manager = ChatSessionManager()
 def clear_history():
     if not LLAVA_AVAILABLE:
-        return {"error": "LLaVA not available"}
     try:
         chatbot_instance = chat_manager.get_chatbot(args, args.model_path if args else "PULSE-ECG/PULSE-7B", tokenizer, model, image_processor, context_len)
-        mode = getattr(chatbot_instance, 'conv_mode', None)
-        if mode and LLAVA_AVAILABLE and mode in conv_templates:
-            chatbot_instance.conversation = conv_templates[mode].copy()
-        else:
-            chatbot_instance.conversation = chatbot_instance.conversation.__class__()
-        return {"status": "success", "message": "Conversation cleared"}
     except Exception as e:
         return {"error": f"Failed to clear history: {str(e)}"}
-def _strip_prefix_relaxed(text: str, prefix: str) -> str:
-    try:
-        if text.startswith(prefix):
-            return text[len(prefix):]
-        t_norm = " ".join(text.split())
-        p_norm = " ".join(prefix.split())
-        if t_norm.startswith(p_norm):
-            idx = text.find(prefix.splitlines()[0]) if prefix.splitlines() else -1
-            if idx >= 0:
-                return text[idx + len(prefix.splitlines()[0]):]
-    except Exception:
-        pass
-    return text
 def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, max_output_tokens=4096, repetition_penalty=1.0, conv_mode_override=None):
     if not LLAVA_AVAILABLE:
-        return {"error": "LLaVA not available"}
     try:
         if not message_text or not image_input:
-            return {"error": "Both message and image required"}
         our_chatbot = chat_manager.get_chatbot(args, args.model_path if args else "PULSE-ECG/PULSE-7B", tokenizer, model, image_processor, context_len)
-        image = process_image_input(image_input)
         img_byte_arr = BytesIO()
         image.save(img_byte_arr, format='JPEG')
-        image_hash = hashlib.md5(img_byte_arr.getvalue()).hexdigest()
         t = datetime.datetime.now()
-        filename = os.path.join(LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{image_hash}.jpg")
-        os.makedirs(os.path.dirname(filename), exist_ok=True)
-        image.save(filename)
-        processed_images = process_images([image], our_chatbot.image_processor, our_chatbot.model.config)
-        image_tensor = processed_images[0].half().to(our_chatbot.model.device).unsqueeze(0)
-        if conv_mode_override:
-            our_chatbot.conversation = conv_templates[conv_mode_override].copy()
-        else:
-            our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
         inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
         prompt = our_chatbot.conversation.get_prompt()
-        input_ids = tokenizer_image_token(prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(our_chatbot.model.device)
-        stop_str = our_chatbot.conversation.sep if our_chatbot.conversation.sep_style != SeparatorStyle.TWO else our_chatbot.conversation.sep2
-        stopping_criteria = KeywordsStoppingCriteria([stop_str], our_chatbot.tokenizer, input_ids)
-        pad_id = our_chatbot.tokenizer.pad_token_id
-        eos_id = our_chatbot.tokenizer.eos_token_id if our_chatbot.tokenizer.eos_token_id is not None else pad_id
-        gen_cfg = GenerationConfig(
-            do_sample=True, temperature=float(temperature), top_p=float(top_p),
-            max_new_tokens=int(max_output_tokens), repetition_penalty=float(repetition_penalty),
-            pad_token_id=pad_id, eos_token_id=eos_id
         )
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
                 images=image_tensor,
-                generation_config=gen_cfg,
-                use_cache=True,
                 stopping_criteria=[stopping_criteria],
-                return_dict_in_generate=True
             )
-        sequences = outputs.sequences
-        gen_ids = sequences[0]
-        full_text = our_chatbot.tokenizer.decode(gen_ids, skip_special_tokens=True)
-        prompt_text = our_chatbot.tokenizer.decode(input_ids[0], skip_special_tokens=True)
-        if gen_ids.shape[0] > input_ids.shape[1]:
-            response = our_chatbot.tokenizer.decode(gen_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
-        else:
-            response = _strip_prefix_relaxed(full_text, prompt_text).strip()
-        if not response:
-            response = full_text.replace(stop_str, "").strip()
-        our_chatbot.conversation.messages[-1][-1] = response
         history = [(message_text, response)]
         with open(get_conv_log_filename(), "a") as fout:
-            fout.write(json.dumps({
-                "type": "chat", "model": "PULSE-7b", "state": history,
-                "images": [image_hash], "images_path": [filename]
-            }) + "\n")
-        return {"status": "success", "response": response, "conversation_id": id(our_chatbot.conversation)}
     except Exception as e:
         return {"error": f"Generation failed: {str(e)}"}
 def upvote_last_response(conversation_id):
     try:
         vote_last_response({"conversation_id": conversation_id}, "upvote", "PULSE-7B")
-        return {"status": "success", "message": "Upvoted"}
     except Exception as e:
-        return {"error": str(e)}
 def downvote_last_response(conversation_id):
     try:
         vote_last_response({"conversation_id": conversation_id}, "downvote", "PULSE-7B")
-        return {"status": "success", "message": "Downvoted"}
     except Exception as e:
-        return {"error": str(e)}
 def flag_response(conversation_id):
     try:
         vote_last_response({"conversation_id": conversation_id}, "flag", "PULSE-7B")
-        return {"status": "success", "message": "Flagged"}
     except Exception as e:
-        return {"error": str(e)}
 def initialize_model():
     global tokenizer, model, image_processor, context_len, args
     if not LLAVA_AVAILABLE:
-        print("LLaVA not available")
         return False
     try:
         class Args:
             def __init__(self):
                 self.model_path = "PULSE-ECG/PULSE-7B"
@@ -368,45 +493,95 @@ def initialize_model():
                 self.load_8bit = False
                 self.load_4bit = False
                 self.debug = False
         args = Args()
-        tok, mdl, img_proc, ctx_len = load_pretrained_model(args.model_path, args.model_base, get_model_name_from_path(args.model_path), args.load_8bit, args.load_4bit)
-        if tok.eos_token_id is None:
-            tok.add_special_tokens({"eos_token": "</s>"})
-        if tok.pad_token_id is None:
-            tok.pad_token = tok.eos_token
-        tokenizer, model, image_processor, context_len = tok, mdl, img_proc, ctx_len
         if torch.cuda.is_available():
             model = model.to(torch.device('cuda'))
         return True
     except Exception as e:
-        print(f"Init model fail: {e}")
         return False
 model_initialized = False
 def query(payload):
     global model_initialized
     if not model_initialized:
         model_initialized = initialize_model()
         if not model_initialized:
-            return {"error": "Model init failed"}
     try:
-        message_text = payload.get("message") or payload.get("query") or payload.get("prompt") or payload.get("istem") or ""
-        image_input = payload.get("image") or payload.get("image_url") or payload.get("img") or None
         temperature = float(payload.get("temperature", 0.05))
         top_p = float(payload.get("top_p", 1.0))
-        max_output_tokens = int(payload.get("max_output_tokens", payload.get("max_new_tokens", payload.get("max_tokens", 4096))))
         repetition_penalty = float(payload.get("repetition_penalty", 1.0))
         conv_mode_override = payload.get("conv_mode", None)
-        if not message_text.strip():
-            return {"error": "Missing prompt text"}
         if not image_input:
-            return {"error": "Missing image"}
-        return generate_response(message_text, image_input, temperature, top_p, max_output_tokens, repetition_penalty, conv_mode_override)
     except Exception as e:
-        return {"error": str(e)}
 def health_check():
     return {
         "status": "healthy",
         "model_initialized": model_initialized,
@@ -414,26 +589,56 @@ def health_check():
         "llava_available": LLAVA_AVAILABLE,
         "transformers_available": TRANSFORMERS_AVAILABLE,
         "cv2_available": CV2_AVAILABLE,
-        "lazy_loading": True
     }
 def get_model_info():
     if not model_initialized:
-        return {"error": "Not initialized", "lazy_loading": True}
-    return {"model_path": args.model_path if args else "Unknown", "model_type": "PULSE-7B", "cuda_available": torch.cuda.is_available(), "device": str(model.device) if model else "Unknown"}
 class EndpointHandler:
     def __init__(self, model_dir):
         self.model_dir = model_dir
-        print(f"Handler init with model_dir={model_dir}")
     def __call__(self, payload):
         if "inputs" in payload:
-            return query(payload["inputs"])
-        return query(payload)
     def health_check(self):
         return health_check()
     def get_model_info(self):
         return get_model_info()
 if __name__ == "__main__":
-    print("Handler loaded and ready.")

 import os
 import datetime
 import torch
 # Try to import transformers
 try:
+    from transformers import TextStreamer, TextIteratorStreamer
     TRANSFORMERS_AVAILABLE = True
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
 LOGDIR = external_log_dir
 VOTEDIR = "./votes"
+# Global variables for model and tokenizer
 tokenizer = None
 model = None
 image_processor = None
 def is_valid_video_filename(name):
     if not CV2_AVAILABLE:
+        return False  # Video processing disabled
     video_extensions = ["avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg"]
     ext = name.split(".")[-1].lower()
     return ext in video_extensions
 def sample_frames(video_file, num_frames):
     if not CV2_AVAILABLE:
+        raise ImportError("cv2 (OpenCV) not available. Video processing is disabled.")
     video = cv2.VideoCapture(video_file)
     total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
     interval = total_frames // num_frames
     return frames
 def load_image(image_file):
+    if image_file.startswith("http") or image_file.startswith("https"):
         response = requests.get(image_file)
         if response.status_code == 200:
             image = Image.open(BytesIO(response.content)).convert("RGB")
         else:
             raise ValueError("Failed to load image from URL")
     else:
+        print("Load image from local file")
+        print(image_file)
         image = Image.open(image_file).convert("RGB")
     return image
 def process_base64_image(base64_string):
+    """Process base64 encoded image string"""
+    try:
+        # Remove data URL prefix if present
+        if base64_string.startswith('data:image'):
+            base64_string = base64_string.split(',')[1]
+        # Decode base64 to bytes
+        image_data = base64.b64decode(base64_string)
+        # Convert to PIL Image
+        image = Image.open(BytesIO(image_data)).convert("RGB")
+        return image
+    except Exception as e:
+        raise ValueError(f"Failed to process base64 image: {e}")
 def process_image_input(image_input):
+    """Process different types of image input (file path, URL, or base64)"""
     if isinstance(image_input, str):
         if image_input.startswith("http"):
             return load_image(image_input)
         elif os.path.exists(image_input):
             return load_image(image_input)
         else:
+            # Try to process as base64
             return process_base64_image(image_input)
     elif isinstance(image_input, dict) and "image" in image_input:
+        # Handle base64 image from dict
         return process_base64_image(image_input["image"])
     else:
         raise ValueError("Unsupported image input format")
             raise ImportError("LLaVA modules not available")
         disable_torch_init()
         self.tokenizer, self.model, self.image_processor, self.context_len = (
+            tokenizer,
+            model,
+            image_processor,
+            context_len,
         )
         model_name = get_model_name_from_path(model_path)
         if "llama-2" in model_name.lower():
             conv_mode = "llava_llama_2"
             conv_mode = "qwen_1_5"
         else:
             conv_mode = "llava_v0"
         if args.conv_mode is not None and conv_mode != args.conv_mode:
+            print(
+                "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format(
+                    conv_mode, args.conv_mode, args.conv_mode
+                )
+            )
         else:
             args.conv_mode = conv_mode
         self.conv_mode = conv_mode
 class ChatSessionManager:
     def __init__(self):
         self.chatbot_instance = None
     def initialize_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
         self.chatbot_instance = InferenceDemo(args, model_path, tokenizer, model, image_processor, context_len)
         print(f"Initialized Chatbot instance with ID: {id(self.chatbot_instance)}")
     def reset_chatbot(self):
         self.chatbot_instance = None
     def get_chatbot(self, args, model_path, tokenizer, model, image_processor, context_len):
         if self.chatbot_instance is None:
             self.initialize_chatbot(args, model_path, tokenizer, model, image_processor, context_len)
 chat_manager = ChatSessionManager()
 def clear_history():
+    """Clear conversation history"""
     if not LLAVA_AVAILABLE:
+        return {"error": "LLaVA modules not available"}
     try:
         chatbot_instance = chat_manager.get_chatbot(args, args.model_path if args else "PULSE-ECG/PULSE-7B", tokenizer, model, image_processor, context_len)
+        try:
+            if hasattr(chatbot_instance, 'conv_mode') and chatbot_instance.conv_mode and LLAVA_AVAILABLE:
+                chatbot_instance.conversation = conv_templates[chatbot_instance.conv_mode].copy()
+            else:
+                # Use default conversation template
+                chatbot_instance.conversation = chatbot_instance.conversation.__class__()
+        except Exception as e:
+            print(f"[DEBUG] Failed to reset conversation in clear_history: {e}")
+        return {"status": "success", "message": "Conversation history cleared"}
     except Exception as e:
         return {"error": f"Failed to clear history: {str(e)}"}
+def add_message(message_text, image_input=None):
+    """Add a message to the conversation"""
+    return {"status": "success", "message": "Message added"}
 def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, max_output_tokens=4096, repetition_penalty=1.0, conv_mode_override=None):
+    """Generate response for the given message and image"""
     if not LLAVA_AVAILABLE:
+        return {"error": "LLaVA modules not available"}
     try:
         if not message_text or not image_input:
+            return {"error": "Both message text and image are required"}
         our_chatbot = chat_manager.get_chatbot(args, args.model_path if args else "PULSE-ECG/PULSE-7B", tokenizer, model, image_processor, context_len)
+        # Process image input
+        try:
+            image = process_image_input(image_input)
+        except Exception as e:
+            return {"error": f"Failed to process image: {str(e)}"}
+        # Save image for logging
+        all_image_hash = []
+        all_image_path = []
+        # Generate hash for the image
         img_byte_arr = BytesIO()
         image.save(img_byte_arr, format='JPEG')
+        img_byte_arr = img_byte_arr.getvalue()
+        image_hash = hashlib.md5(img_byte_arr).hexdigest()
+        all_image_hash.append(image_hash)
+        # Save image to logs
         t = datetime.datetime.now()
+        filename = os.path.join(
+            LOGDIR,
+            "serve_images",
+            f"{t.year}-{t.month:02d}-{t.day:02d}",
+            f"{image_hash}.jpg",
+        )
+        all_image_path.append(filename)
+        if not os.path.isfile(filename):
+            os.makedirs(os.path.dirname(filename), exist_ok=True)
+            print("image save to", filename)
+            image.save(filename)
+        # Process image for model
+        try:
+            print(f"[DEBUG] Processing image for model...")
+            processed_images = process_images([image], our_chatbot.image_processor, our_chatbot.model.config)
+            print(f"[DEBUG] Processed images length: {len(processed_images)}")
+            if len(processed_images) == 0:
+                return {"error": "Image processing returned empty list"}
+            image_tensor = processed_images[0]
+            image_tensor = image_tensor.half().to(our_chatbot.model.device)
+            image_tensor = image_tensor.unsqueeze(0)
+            print(f"[DEBUG] Image tensor shape: {image_tensor.shape}")
+        except Exception as e:
+            print(f"[DEBUG] Image processing error: {str(e)}")
+            return {"error": f"Image processing failed: {str(e)}"}
+        # Prepare conversation - reset for each request to avoid history issues
+        try:
+            if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
+                our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
+            else:
+                # Use default conversation template
+                our_chatbot.conversation = our_chatbot.conversation.__class__()
+        except Exception as e:
+            print(f"[DEBUG] Failed to reset conversation: {e}")
+            # Continue with existing conversation
         inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
         prompt = our_chatbot.conversation.get_prompt()
+        # Tokenize input
+        input_ids = tokenizer_image_token(
+            prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
+        ).unsqueeze(0).to(our_chatbot.model.device)
+        # Set up stopping criteria
+        stop_str = (
+            our_chatbot.conversation.sep
+            if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
+            else our_chatbot.conversation.sep2
+        )
+        keywords = [stop_str]
+        stopping_criteria = KeywordsStoppingCriteria(
+            keywords, our_chatbot.tokenizer, input_ids
         )
+        # Generate response
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
                 images=image_tensor,
+                do_sample=True,
+                temperature=temperature,
+                top_p=top_p,
+                max_new_tokens=max_output_tokens,
+                repetition_penalty=repetition_penalty,
+                use_cache=False,
                 stopping_criteria=[stopping_criteria],
             )
+        # Decode response
+        try:
+            print(f"[DEBUG] Outputs shape: {outputs.shape if hasattr(outputs, 'shape') else 'No shape attr'}")
+            print(f"[DEBUG] Outputs length: {len(outputs) if hasattr(outputs, '__len__') else 'No length'}")
+            print(f"[DEBUG] Input IDs shape: {input_ids.shape}")
+            if len(outputs) == 0:
+                return {"error": "Model generated empty output"}
+            response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
+            print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
+            if len(our_chatbot.conversation.messages) > 0:
+                last_message = our_chatbot.conversation.messages[-1]
+                print(f"[DEBUG] Last message: {last_message}")
+                if isinstance(last_message, list) and len(last_message) > 1:
+                    our_chatbot.conversation.messages[-1][-1] = response
+                    print(f"[DEBUG] Response added to conversation")
+                else:
+                    print(f"[DEBUG] Last message format unexpected: {last_message}")
+                    # Add response as new message if format is wrong
+                    our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
+            else:
+                print("[DEBUG] No conversation messages found")
+                # Add response as new message
+                our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
+            print(f"[DEBUG] Generated response length: {len(response)}")
+        except Exception as e:
+            print(f"[DEBUG] Response decoding error: {str(e)}")
+            return {"error": f"Response decoding failed: {str(e)}"}
+        # Log conversation
         history = [(message_text, response)]
         with open(get_conv_log_filename(), "a") as fout:
+            data = {
+                "type": "chat",
+                "model": "PULSE-7b",
+                "state": history,
+                "images": all_image_hash,
+                "images_path": all_image_path
+            }
+            print("#### conv log", data)
+            fout.write(json.dumps(data) + "\n")
+        # Upload files to Hugging Face if configured
+        if api and repo_name:
+            try:
+                for upload_img in all_image_path:
+                    api.upload_file(
+                        path_or_fileobj=upload_img,
+                        path_in_repo=upload_img.replace("./logs/", ""),
+                        repo_id=repo_name,
+                        repo_type="dataset",
+                    )
+                # Upload conversation log
+                api.upload_file(
+                    path_or_fileobj=get_conv_log_filename(),
+                    path_in_repo=get_conv_log_filename().replace("./logs/", ""),
+                    repo_id=repo_name,
+                    repo_type="dataset")
+            except Exception as e:
+                print(f"Failed to upload files: {e}")
+        return {
+            "status": "success",
+            "response": response,
+            "conversation_id": id(our_chatbot.conversation)
+        }
     except Exception as e:
         return {"error": f"Generation failed: {str(e)}"}
 def upvote_last_response(conversation_id):
+    """Upvote the last response"""
     try:
         vote_last_response({"conversation_id": conversation_id}, "upvote", "PULSE-7B")
+        return {"status": "success", "message": "Thank you for your voting!"}
     except Exception as e:
+        return {"error": f"Failed to upvote: {str(e)}"}
 def downvote_last_response(conversation_id):
+    """Downvote the last response"""
     try:
         vote_last_response({"conversation_id": conversation_id}, "downvote", "PULSE-7B")
+        return {"status": "success", "message": "Thank you for your voting!"}
     except Exception as e:
+        return {"error": f"Failed to downvote: {str(e)}"}
 def flag_response(conversation_id):
+    """Flag the last response"""
     try:
         vote_last_response({"conversation_id": conversation_id}, "flag", "PULSE-7B")
+        return {"status": "success", "message": "Response flagged successfully"}
     except Exception as e:
+        return {"error": f"Failed to flag response: {str(e)}"}
+# Initialize model when module is imported
 def initialize_model():
+    """Initialize the model and tokenizer"""
     global tokenizer, model, image_processor, context_len, args
     if not LLAVA_AVAILABLE:
+        print("LLaVA modules not available, skipping model initialization")
         return False
     try:
+        # Set default arguments
         class Args:
             def __init__(self):
                 self.model_path = "PULSE-ECG/PULSE-7B"
                 self.load_8bit = False
                 self.load_4bit = False
                 self.debug = False
         args = Args()
+        # Load model
+        model_path = args.model_path
+        model_name = get_model_name_from_path(args.model_path)
+        tokenizer, model, image_processor, context_len = load_pretrained_model(
+            args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit
+        )
+        print("### image_processor", image_processor)
+        print("### tokenizer", tokenizer)
+        # Move model to GPU if available
         if torch.cuda.is_available():
             model = model.to(torch.device('cuda'))
+            print("Model moved to CUDA")
+        else:
+            print("CUDA not available, using CPU")
         return True
     except Exception as e:
+        print(f"Failed to initialize model: {e}")
         return False
+# Don't initialize model on import - do it lazily
 model_initialized = False
+# Main endpoint function for Hugging Face
 def query(payload):
+    """Main endpoint function for Hugging Face inference API"""
     global model_initialized
+    # Lazy initialization - initialize model on first call
     if not model_initialized:
+        print("Initializing model on first query...")
         model_initialized = initialize_model()
         if not model_initialized:
+            return {"error": "Model initialization failed"}
     try:
+        print(f"[DEBUG] query payload keys={list(payload.keys()) if hasattr(payload,'keys') else 'N/A'}")
+        # Extract prompt with multiple possible keys
+        message_text = (payload.get("message") or
+                       payload.get("query") or
+                       payload.get("prompt") or
+                       payload.get("istem") or "")
+        # Extract image with multiple possible keys
+        image_input = (payload.get("image") or
+                      payload.get("image_url") or
+                      payload.get("img") or None)
+        # Extract generation parameters with fallbacks
         temperature = float(payload.get("temperature", 0.05))
         top_p = float(payload.get("top_p", 1.0))
+        max_output_tokens = int(payload.get("max_output_tokens",
+                               payload.get("max_new_tokens",
+                               payload.get("max_tokens", 4096))))
         repetition_penalty = float(payload.get("repetition_penalty", 1.0))
         conv_mode_override = payload.get("conv_mode", None)
+        if not message_text or not message_text.strip():
+            return {"error": "Missing prompt text. Use 'message', 'query', 'prompt', or 'istem' key"}
         if not image_input:
+            return {"error": "Missing image. Use 'image', 'image_url', or 'img' key"}
+        # Generate response with all parameters
+        result = generate_response(
+            message_text=message_text,
+            image_input=image_input,
+            temperature=temperature,
+            top_p=top_p,
+            max_output_tokens=max_output_tokens,
+            repetition_penalty=repetition_penalty,
+            conv_mode_override=conv_mode_override
+        )
+        return result
     except Exception as e:
+        return {"error": f"Query failed: {str(e)}"}
+# Additional utility endpoints
 def health_check():
+    """Health check endpoint"""
     return {
         "status": "healthy",
         "model_initialized": model_initialized,
         "llava_available": LLAVA_AVAILABLE,
         "transformers_available": TRANSFORMERS_AVAILABLE,
         "cv2_available": CV2_AVAILABLE,
+        "lazy_loading": True  # Model will be loaded on first query
     }
 def get_model_info():
+    """Get model information"""
     if not model_initialized:
+        return {
+            "error": "Model not initialized yet",
+            "lazy_loading": True,
+            "note": "Model will be loaded on first query"
+        }
+    return {
+        "model_path": args.model_path if args else "Unknown",
+        "model_type": "PULSE-7B",
+        "cuda_available": torch.cuda.is_available(),
+        "device": str(model.device) if model else "Unknown"
+    }
+# Hugging Face EndpointHandler class
 class EndpointHandler:
+    """Hugging Face endpoint handler class"""
     def __init__(self, model_dir):
+        """Initialize the endpoint handler"""
         self.model_dir = model_dir
+        print(f"EndpointHandler initialized with model_dir: {model_dir}")
     def __call__(self, payload):
+        """Main endpoint function - handles Hugging Face payload format"""
+        # Hugging Face sends payload in "inputs" wrapper
         if "inputs" in payload:
+            # Extract the actual payload from inputs wrapper
+            actual_payload = payload["inputs"]
+            return query(actual_payload)
+        else:
+            # Direct payload (for backward compatibility)
+            return query(payload)
     def health_check(self):
+        """Health check endpoint"""
         return health_check()
     def get_model_info(self):
+        """Get model information"""
         return get_model_info()
+# For backward compatibility and testing
 if __name__ == "__main__":
+    print("Handler module loaded successfully!")
+    print("This handler is now ready for Hugging Face endpoints.")
+    print("Use the 'query' function as the main endpoint.")
+    print("Or use EndpointHandler class for Hugging Face compatibility.")