CanerDedeoglu
/

Rapid_ECG

@@ -327,70 +327,38 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
             return {"error": f"Image processing failed: {str(e)}"}
         # Prepare conversation - reset for each request to avoid history issues
-        print(f"[DEBUG] Resetting conversation for new request...")
         try:
             if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
                 our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
-                print(f"[DEBUG] Conversation reset using conv_mode: {our_chatbot.conv_mode}")
             else:
-                # Create a completely new conversation instance
-                from llava.conversation import conv_templates, SeparatorStyle
-                default_conv_mode = "llava_v1"
-                if default_conv_mode in conv_templates:
-                    our_chatbot.conversation = conv_templates[default_conv_mode].copy()
-                    print(f"[DEBUG] Conversation reset using default conv_mode: {default_conv_mode}")
-                else:
-                    # Fallback: create minimal conversation
-                    our_chatbot.conversation.messages = []
-                    print(f"[DEBUG] Conversation reset using fallback method")
         except Exception as e:
             print(f"[DEBUG] Failed to reset conversation: {e}")
-            # Emergency fallback: clear messages
-            try:
-                our_chatbot.conversation.messages = []
-                print(f"[DEBUG] Emergency conversation reset completed")
-            except:
-                print(f"[DEBUG] Emergency conversation reset failed")
         inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
-        print(f"[DEBUG] Conversation roles: {our_chatbot.conversation.roles}")
-        print(f"[DEBUG] Adding user message: {inp[:100]}...")
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
         prompt = our_chatbot.conversation.get_prompt()
-        print(f"[DEBUG] Generated prompt length: {len(prompt)}")
-        print(f"[DEBUG] Prompt preview: {prompt[:200]}...")
         # Tokenize input
         input_ids = tokenizer_image_token(
             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
-        # Set up stopping criteria - make it more flexible
         stop_str = (
             our_chatbot.conversation.sep
             if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
             else our_chatbot.conversation.sep2
         )
-        print(f"[DEBUG] Original stop_str: {stop_str}")
-        # Use more flexible stopping criteria to allow longer responses
-        keywords = [stop_str] if stop_str else []
-        if not keywords:
-            # If no separator, use common end tokens
-            keywords = ["</s>", "<s>", "Human:", "Assistant:"]
-        print(f"[DEBUG] Using keywords for stopping: {keywords}")
         stopping_criteria = KeywordsStoppingCriteria(
             keywords, our_chatbot.tokenizer, input_ids
         )
         # Generate response
-        print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
-        print(f"[DEBUG] Stopping criteria: {stop_str}")
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
@@ -415,22 +383,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
             response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
-            # Validate response
-            if not response or not response.strip():
-                print(f"[DEBUG] Empty response detected, trying alternative decoding...")
-                # Try decoding without skip_special_tokens
-                response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=False)
-                if not response or not response.strip():
-                    print(f"[DEBUG] Still empty response, checking raw outputs...")
-                    # Check if outputs are valid
-                    raw_response = our_chatbot.tokenizer.decode(outputs[0], skip_special_tokens=True)
-                    print(f"[DEBUG] Raw response: {raw_response[:200]}...")
-                    # Extract only the new part
-                    response = raw_response[len(our_chatbot.tokenizer.decode(input_ids[0], skip_special_tokens=True)):]
-                    response = response.strip()
-            print(f"[DEBUG] Final response: {response[:100]}...")
             print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
             if len(our_chatbot.conversation.messages) > 0:
                 last_message = our_chatbot.conversation.messages[-1]
@@ -448,9 +400,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
                 our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
             print(f"[DEBUG] Generated response length: {len(response)}")
-            print(f"[DEBUG] Response word count: {len(response.split())}")
-            print(f"[DEBUG] Response preview: {response[:100]}...")
-            print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
         except Exception as e:
             print(f"[DEBUG] Response decoding error: {str(e)}")
             return {"error": f"Response decoding failed: {str(e)}"}
@@ -692,4 +641,4 @@ if __name__ == "__main__":
     print("Handler module loaded successfully!")
     print("This handler is now ready for Hugging Face endpoints.")
     print("Use the 'query' function as the main endpoint.")
-    print("Or use EndpointHandler class for Hugging Face compatibility.")

             return {"error": f"Image processing failed: {str(e)}"}
         # Prepare conversation - reset for each request to avoid history issues
         try:
             if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
                 our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
             else:
+                # Use default conversation template
+                our_chatbot.conversation = our_chatbot.conversation.__class__()
         except Exception as e:
             print(f"[DEBUG] Failed to reset conversation: {e}")
+            # Continue with existing conversation
         inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
         our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
         prompt = our_chatbot.conversation.get_prompt()
         # Tokenize input
         input_ids = tokenizer_image_token(
             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
+        # Set up stopping criteria
         stop_str = (
             our_chatbot.conversation.sep
             if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
             else our_chatbot.conversation.sep2
         )
+        keywords = [stop_str]
         stopping_criteria = KeywordsStoppingCriteria(
             keywords, our_chatbot.tokenizer, input_ids
         )
         # Generate response
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
             response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
             print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
             if len(our_chatbot.conversation.messages) > 0:
                 last_message = our_chatbot.conversation.messages[-1]
                 our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
             print(f"[DEBUG] Generated response length: {len(response)}")
         except Exception as e:
             print(f"[DEBUG] Response decoding error: {str(e)}")
             return {"error": f"Response decoding failed: {str(e)}"}
     print("Handler module loaded successfully!")
     print("This handler is now ready for Hugging Face endpoints.")
     print("Use the 'query' function as the main endpoint.")
+    print("Or use EndpointHandler class for Hugging Face compatibility.")