stopping editet
#29
by
ismailhakki37
- opened
- handler.py +17 -3
handler.py
CHANGED
|
@@ -347,18 +347,29 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
|
|
| 347 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
| 348 |
).unsqueeze(0).to(our_chatbot.model.device)
|
| 349 |
|
| 350 |
-
# Set up stopping criteria
|
| 351 |
stop_str = (
|
| 352 |
our_chatbot.conversation.sep
|
| 353 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
| 354 |
else our_chatbot.conversation.sep2
|
| 355 |
)
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
stopping_criteria = KeywordsStoppingCriteria(
|
| 358 |
keywords, our_chatbot.tokenizer, input_ids
|
| 359 |
)
|
| 360 |
|
| 361 |
# Generate response
|
|
|
|
|
|
|
|
|
|
| 362 |
with torch.no_grad():
|
| 363 |
outputs = our_chatbot.model.generate(
|
| 364 |
inputs=input_ids,
|
|
@@ -400,6 +411,9 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
|
|
| 400 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
|
| 401 |
|
| 402 |
print(f"[DEBUG] Generated response length: {len(response)}")
|
|
|
|
|
|
|
|
|
|
| 403 |
except Exception as e:
|
| 404 |
print(f"[DEBUG] Response decoding error: {str(e)}")
|
| 405 |
return {"error": f"Response decoding failed: {str(e)}"}
|
|
@@ -641,4 +655,4 @@ if __name__ == "__main__":
|
|
| 641 |
print("Handler module loaded successfully!")
|
| 642 |
print("This handler is now ready for Hugging Face endpoints.")
|
| 643 |
print("Use the 'query' function as the main endpoint.")
|
| 644 |
-
print("Or use EndpointHandler class for Hugging Face compatibility.")
|
|
|
|
| 347 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
| 348 |
).unsqueeze(0).to(our_chatbot.model.device)
|
| 349 |
|
| 350 |
+
# Set up stopping criteria - make it more flexible
|
| 351 |
stop_str = (
|
| 352 |
our_chatbot.conversation.sep
|
| 353 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
| 354 |
else our_chatbot.conversation.sep2
|
| 355 |
)
|
| 356 |
+
print(f"[DEBUG] Original stop_str: {stop_str}")
|
| 357 |
+
|
| 358 |
+
# Use more flexible stopping criteria to allow longer responses
|
| 359 |
+
keywords = [stop_str] if stop_str else []
|
| 360 |
+
if not keywords:
|
| 361 |
+
# If no separator, use common end tokens
|
| 362 |
+
keywords = ["</s>", "<s>", "Human:", "Assistant:"]
|
| 363 |
+
|
| 364 |
+
print(f"[DEBUG] Using keywords for stopping: {keywords}")
|
| 365 |
stopping_criteria = KeywordsStoppingCriteria(
|
| 366 |
keywords, our_chatbot.tokenizer, input_ids
|
| 367 |
)
|
| 368 |
|
| 369 |
# Generate response
|
| 370 |
+
print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
|
| 371 |
+
print(f"[DEBUG] Stopping criteria: {stop_str}")
|
| 372 |
+
|
| 373 |
with torch.no_grad():
|
| 374 |
outputs = our_chatbot.model.generate(
|
| 375 |
inputs=input_ids,
|
|
|
|
| 411 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
|
| 412 |
|
| 413 |
print(f"[DEBUG] Generated response length: {len(response)}")
|
| 414 |
+
print(f"[DEBUG] Response word count: {len(response.split())}")
|
| 415 |
+
print(f"[DEBUG] Response preview: {response[:100]}...")
|
| 416 |
+
print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
|
| 417 |
except Exception as e:
|
| 418 |
print(f"[DEBUG] Response decoding error: {str(e)}")
|
| 419 |
return {"error": f"Response decoding failed: {str(e)}"}
|
|
|
|
| 655 |
print("Handler module loaded successfully!")
|
| 656 |
print("This handler is now ready for Hugging Face endpoints.")
|
| 657 |
print("Use the 'query' function as the main endpoint.")
|
| 658 |
+
print("Or use EndpointHandler class for Hugging Face compatibility.")
|