Files changed (1) hide show
  1. handler.py +3 -21
handler.py CHANGED
@@ -347,26 +347,9 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
347
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
348
  ).unsqueeze(0).to(our_chatbot.model.device)
349
 
350
- # Set up stopping criteria - more flexible to allow longer responses
351
- stop_str = (
352
- our_chatbot.conversation.sep
353
- if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
354
- else our_chatbot.conversation.sep2
355
- )
356
-
357
- # Use minimal stopping criteria to allow longer responses
358
- keywords = []
359
- if stop_str and stop_str.strip():
360
- keywords.append(stop_str)
361
-
362
- # Only add very basic stopping criteria to prevent infinite generation
363
- if not keywords:
364
- keywords = ["</s>", "<s>"]
365
-
366
- print(f"[DEBUG] Using stopping criteria: {keywords}")
367
- stopping_criteria = KeywordsStoppingCriteria(
368
- keywords, our_chatbot.tokenizer, input_ids
369
- )
370
 
371
  # Generate response
372
  with torch.no_grad():
@@ -379,7 +362,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
379
  max_new_tokens=max_output_tokens,
380
  repetition_penalty=repetition_penalty,
381
  use_cache=False,
382
- stopping_criteria=[stopping_criteria],
383
  pad_token_id=our_chatbot.tokenizer.eos_token_id,
384
  eos_token_id=our_chatbot.tokenizer.eos_token_id,
385
  length_penalty=1.0, # Don't penalize longer sequences
 
347
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
348
  ).unsqueeze(0).to(our_chatbot.model.device)
349
 
350
+ # No stopping criteria - let model generate freely up to max_new_tokens
351
+ print(f"[DEBUG] No stopping criteria - free generation up to {max_output_tokens} tokens")
352
+ stopping_criteria = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  # Generate response
355
  with torch.no_grad():
 
362
  max_new_tokens=max_output_tokens,
363
  repetition_penalty=repetition_penalty,
364
  use_cache=False,
 
365
  pad_token_id=our_chatbot.tokenizer.eos_token_id,
366
  eos_token_id=our_chatbot.tokenizer.eos_token_id,
367
  length_penalty=1.0, # Don't penalize longer sequences