CanerDedeoglu
/

Rapid_ECG

@@ -347,18 +347,29 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
-        # Set up stopping criteria
         stop_str = (
             our_chatbot.conversation.sep
             if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
             else our_chatbot.conversation.sep2
         )
-        keywords = [stop_str]
         stopping_criteria = KeywordsStoppingCriteria(
             keywords, our_chatbot.tokenizer, input_ids
         )
         # Generate response
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
@@ -400,6 +411,9 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
                 our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
             print(f"[DEBUG] Generated response length: {len(response)}")
         except Exception as e:
             print(f"[DEBUG] Response decoding error: {str(e)}")
             return {"error": f"Response decoding failed: {str(e)}"}
@@ -641,4 +655,4 @@ if __name__ == "__main__":
     print("Handler module loaded successfully!")
     print("This handler is now ready for Hugging Face endpoints.")
     print("Use the 'query' function as the main endpoint.")
-    print("Or use EndpointHandler class for Hugging Face compatibility.")

             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
+        # Set up stopping criteria - make it more flexible
         stop_str = (
             our_chatbot.conversation.sep
             if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
             else our_chatbot.conversation.sep2
         )
+        print(f"[DEBUG] Original stop_str: {stop_str}")
+        # Use more flexible stopping criteria to allow longer responses
+        keywords = [stop_str] if stop_str else []
+        if not keywords:
+            # If no separator, use common end tokens
+            keywords = ["</s>", "<s>", "Human:", "Assistant:"]
+        print(f"[DEBUG] Using keywords for stopping: {keywords}")
         stopping_criteria = KeywordsStoppingCriteria(
             keywords, our_chatbot.tokenizer, input_ids
         )
         # Generate response
+        print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
+        print(f"[DEBUG] Stopping criteria: {stop_str}")
         with torch.no_grad():
             outputs = our_chatbot.model.generate(
                 inputs=input_ids,
                 our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
             print(f"[DEBUG] Generated response length: {len(response)}")
+            print(f"[DEBUG] Response word count: {len(response.split())}")
+            print(f"[DEBUG] Response preview: {response[:100]}...")
+            print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
         except Exception as e:
             print(f"[DEBUG] Response decoding error: {str(e)}")
             return {"error": f"Response decoding failed: {str(e)}"}
     print("Handler module loaded successfully!")
     print("This handler is now ready for Hugging Face endpoints.")
     print("Use the 'query' function as the main endpoint.")
+    print("Or use EndpointHandler class for Hugging Face compatibility.")