Files changed (1) hide show
  1. handler.py +17 -3
handler.py CHANGED
@@ -347,18 +347,29 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
347
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
348
  ).unsqueeze(0).to(our_chatbot.model.device)
349
 
350
- # Set up stopping criteria
351
  stop_str = (
352
  our_chatbot.conversation.sep
353
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
354
  else our_chatbot.conversation.sep2
355
  )
356
- keywords = [stop_str]
 
 
 
 
 
 
 
 
357
  stopping_criteria = KeywordsStoppingCriteria(
358
  keywords, our_chatbot.tokenizer, input_ids
359
  )
360
 
361
  # Generate response
 
 
 
362
  with torch.no_grad():
363
  outputs = our_chatbot.model.generate(
364
  inputs=input_ids,
@@ -400,6 +411,9 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
400
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
401
 
402
  print(f"[DEBUG] Generated response length: {len(response)}")
 
 
 
403
  except Exception as e:
404
  print(f"[DEBUG] Response decoding error: {str(e)}")
405
  return {"error": f"Response decoding failed: {str(e)}"}
@@ -641,4 +655,4 @@ if __name__ == "__main__":
641
  print("Handler module loaded successfully!")
642
  print("This handler is now ready for Hugging Face endpoints.")
643
  print("Use the 'query' function as the main endpoint.")
644
- print("Or use EndpointHandler class for Hugging Face compatibility.")
 
347
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
348
  ).unsqueeze(0).to(our_chatbot.model.device)
349
 
350
+ # Set up stopping criteria - make it more flexible
351
  stop_str = (
352
  our_chatbot.conversation.sep
353
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
354
  else our_chatbot.conversation.sep2
355
  )
356
+ print(f"[DEBUG] Original stop_str: {stop_str}")
357
+
358
+ # Use more flexible stopping criteria to allow longer responses
359
+ keywords = [stop_str] if stop_str else []
360
+ if not keywords:
361
+ # If no separator, use common end tokens
362
+ keywords = ["</s>", "<s>", "Human:", "Assistant:"]
363
+
364
+ print(f"[DEBUG] Using keywords for stopping: {keywords}")
365
  stopping_criteria = KeywordsStoppingCriteria(
366
  keywords, our_chatbot.tokenizer, input_ids
367
  )
368
 
369
  # Generate response
370
+ print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
371
+ print(f"[DEBUG] Stopping criteria: {stop_str}")
372
+
373
  with torch.no_grad():
374
  outputs = our_chatbot.model.generate(
375
  inputs=input_ids,
 
411
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
412
 
413
  print(f"[DEBUG] Generated response length: {len(response)}")
414
+ print(f"[DEBUG] Response word count: {len(response.split())}")
415
+ print(f"[DEBUG] Response preview: {response[:100]}...")
416
+ print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
417
  except Exception as e:
418
  print(f"[DEBUG] Response decoding error: {str(e)}")
419
  return {"error": f"Response decoding failed: {str(e)}"}
 
655
  print("Handler module loaded successfully!")
656
  print("This handler is now ready for Hugging Face endpoints.")
657
  print("Use the 'query' function as the main endpoint.")
658
+ print("Or use EndpointHandler class for Hugging Face compatibility.")