CanerDedeoglu
/

Rapid_ECG

Image-Text-to-Text

Model card Files Files and versions

stop criteria removed

#33

by ismailhakki37 - opened Aug 22, 2025

base: refs/heads/main

←

from: refs/pr/33

Discussion Files changed

Files changed (1) hide show

handler.py +3 -21

handler.py CHANGED Viewed

@@ -347,26 +347,9 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
-        # Set up stopping criteria - more flexible to allow longer responses
-        stop_str = (
-            our_chatbot.conversation.sep
-            if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
-            else our_chatbot.conversation.sep2
-        )
-        # Use minimal stopping criteria to allow longer responses
-        keywords = []
-        if stop_str and stop_str.strip():
-            keywords.append(stop_str)
-        # Only add very basic stopping criteria to prevent infinite generation
-        if not keywords:
-            keywords = ["</s>", "<s>"]
-        print(f"[DEBUG] Using stopping criteria: {keywords}")
-        stopping_criteria = KeywordsStoppingCriteria(
-            keywords, our_chatbot.tokenizer, input_ids
-        )
         # Generate response
         with torch.no_grad():
@@ -379,7 +362,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
                 max_new_tokens=max_output_tokens,
                 repetition_penalty=repetition_penalty,
                 use_cache=False,
-                stopping_criteria=[stopping_criteria],
                 pad_token_id=our_chatbot.tokenizer.eos_token_id,
                 eos_token_id=our_chatbot.tokenizer.eos_token_id,
                 length_penalty=1.0,  # Don't penalize longer sequences

             prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
         ).unsqueeze(0).to(our_chatbot.model.device)
+        # No stopping criteria - let model generate freely up to max_new_tokens
+        print(f"[DEBUG] No stopping criteria - free generation up to {max_output_tokens} tokens")
+        stopping_criteria = None
         # Generate response
         with torch.no_grad():
                 max_new_tokens=max_output_tokens,
                 repetition_penalty=repetition_penalty,
                 use_cache=False,
                 pad_token_id=our_chatbot.tokenizer.eos_token_id,
                 eos_token_id=our_chatbot.tokenizer.eos_token_id,
                 length_penalty=1.0,  # Don't penalize longer sequences