handler py
#31
by
ismailhakki37
- opened
- handler.py +6 -57
handler.py
CHANGED
|
@@ -327,70 +327,38 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
|
|
| 327 |
return {"error": f"Image processing failed: {str(e)}"}
|
| 328 |
|
| 329 |
# Prepare conversation - reset for each request to avoid history issues
|
| 330 |
-
print(f"[DEBUG] Resetting conversation for new request...")
|
| 331 |
try:
|
| 332 |
if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
|
| 333 |
our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
|
| 334 |
-
print(f"[DEBUG] Conversation reset using conv_mode: {our_chatbot.conv_mode}")
|
| 335 |
else:
|
| 336 |
-
#
|
| 337 |
-
|
| 338 |
-
default_conv_mode = "llava_v1"
|
| 339 |
-
if default_conv_mode in conv_templates:
|
| 340 |
-
our_chatbot.conversation = conv_templates[default_conv_mode].copy()
|
| 341 |
-
print(f"[DEBUG] Conversation reset using default conv_mode: {default_conv_mode}")
|
| 342 |
-
else:
|
| 343 |
-
# Fallback: create minimal conversation
|
| 344 |
-
our_chatbot.conversation.messages = []
|
| 345 |
-
print(f"[DEBUG] Conversation reset using fallback method")
|
| 346 |
except Exception as e:
|
| 347 |
print(f"[DEBUG] Failed to reset conversation: {e}")
|
| 348 |
-
#
|
| 349 |
-
try:
|
| 350 |
-
our_chatbot.conversation.messages = []
|
| 351 |
-
print(f"[DEBUG] Emergency conversation reset completed")
|
| 352 |
-
except:
|
| 353 |
-
print(f"[DEBUG] Emergency conversation reset failed")
|
| 354 |
|
| 355 |
inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
|
| 356 |
-
print(f"[DEBUG] Conversation roles: {our_chatbot.conversation.roles}")
|
| 357 |
-
print(f"[DEBUG] Adding user message: {inp[:100]}...")
|
| 358 |
-
|
| 359 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
|
| 360 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
|
| 361 |
-
|
| 362 |
prompt = our_chatbot.conversation.get_prompt()
|
| 363 |
-
print(f"[DEBUG] Generated prompt length: {len(prompt)}")
|
| 364 |
-
print(f"[DEBUG] Prompt preview: {prompt[:200]}...")
|
| 365 |
|
| 366 |
# Tokenize input
|
| 367 |
input_ids = tokenizer_image_token(
|
| 368 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
| 369 |
).unsqueeze(0).to(our_chatbot.model.device)
|
| 370 |
|
| 371 |
-
# Set up stopping criteria
|
| 372 |
stop_str = (
|
| 373 |
our_chatbot.conversation.sep
|
| 374 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
| 375 |
else our_chatbot.conversation.sep2
|
| 376 |
)
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
# Use more flexible stopping criteria to allow longer responses
|
| 380 |
-
keywords = [stop_str] if stop_str else []
|
| 381 |
-
if not keywords:
|
| 382 |
-
# If no separator, use common end tokens
|
| 383 |
-
keywords = ["</s>", "<s>", "Human:", "Assistant:"]
|
| 384 |
-
|
| 385 |
-
print(f"[DEBUG] Using keywords for stopping: {keywords}")
|
| 386 |
stopping_criteria = KeywordsStoppingCriteria(
|
| 387 |
keywords, our_chatbot.tokenizer, input_ids
|
| 388 |
)
|
| 389 |
|
| 390 |
# Generate response
|
| 391 |
-
print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
|
| 392 |
-
print(f"[DEBUG] Stopping criteria: {stop_str}")
|
| 393 |
-
|
| 394 |
with torch.no_grad():
|
| 395 |
outputs = our_chatbot.model.generate(
|
| 396 |
inputs=input_ids,
|
|
@@ -415,22 +383,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
|
|
| 415 |
|
| 416 |
response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
|
| 417 |
|
| 418 |
-
# Validate response
|
| 419 |
-
if not response or not response.strip():
|
| 420 |
-
print(f"[DEBUG] Empty response detected, trying alternative decoding...")
|
| 421 |
-
# Try decoding without skip_special_tokens
|
| 422 |
-
response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=False)
|
| 423 |
-
if not response or not response.strip():
|
| 424 |
-
print(f"[DEBUG] Still empty response, checking raw outputs...")
|
| 425 |
-
# Check if outputs are valid
|
| 426 |
-
raw_response = our_chatbot.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 427 |
-
print(f"[DEBUG] Raw response: {raw_response[:200]}...")
|
| 428 |
-
# Extract only the new part
|
| 429 |
-
response = raw_response[len(our_chatbot.tokenizer.decode(input_ids[0], skip_special_tokens=True)):]
|
| 430 |
-
response = response.strip()
|
| 431 |
-
|
| 432 |
-
print(f"[DEBUG] Final response: {response[:100]}...")
|
| 433 |
-
|
| 434 |
print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
|
| 435 |
if len(our_chatbot.conversation.messages) > 0:
|
| 436 |
last_message = our_chatbot.conversation.messages[-1]
|
|
@@ -448,9 +400,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
|
|
| 448 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
|
| 449 |
|
| 450 |
print(f"[DEBUG] Generated response length: {len(response)}")
|
| 451 |
-
print(f"[DEBUG] Response word count: {len(response.split())}")
|
| 452 |
-
print(f"[DEBUG] Response preview: {response[:100]}...")
|
| 453 |
-
print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
|
| 454 |
except Exception as e:
|
| 455 |
print(f"[DEBUG] Response decoding error: {str(e)}")
|
| 456 |
return {"error": f"Response decoding failed: {str(e)}"}
|
|
@@ -692,4 +641,4 @@ if __name__ == "__main__":
|
|
| 692 |
print("Handler module loaded successfully!")
|
| 693 |
print("This handler is now ready for Hugging Face endpoints.")
|
| 694 |
print("Use the 'query' function as the main endpoint.")
|
| 695 |
-
print("Or use EndpointHandler class for Hugging Face compatibility.")
|
|
|
|
| 327 |
return {"error": f"Image processing failed: {str(e)}"}
|
| 328 |
|
| 329 |
# Prepare conversation - reset for each request to avoid history issues
|
|
|
|
| 330 |
try:
|
| 331 |
if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
|
| 332 |
our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
|
|
|
|
| 333 |
else:
|
| 334 |
+
# Use default conversation template
|
| 335 |
+
our_chatbot.conversation = our_chatbot.conversation.__class__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
except Exception as e:
|
| 337 |
print(f"[DEBUG] Failed to reset conversation: {e}")
|
| 338 |
+
# Continue with existing conversation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
|
|
|
|
|
|
|
|
|
|
| 341 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
|
| 342 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
|
|
|
|
| 343 |
prompt = our_chatbot.conversation.get_prompt()
|
|
|
|
|
|
|
| 344 |
|
| 345 |
# Tokenize input
|
| 346 |
input_ids = tokenizer_image_token(
|
| 347 |
prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
| 348 |
).unsqueeze(0).to(our_chatbot.model.device)
|
| 349 |
|
| 350 |
+
# Set up stopping criteria
|
| 351 |
stop_str = (
|
| 352 |
our_chatbot.conversation.sep
|
| 353 |
if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
|
| 354 |
else our_chatbot.conversation.sep2
|
| 355 |
)
|
| 356 |
+
keywords = [stop_str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
stopping_criteria = KeywordsStoppingCriteria(
|
| 358 |
keywords, our_chatbot.tokenizer, input_ids
|
| 359 |
)
|
| 360 |
|
| 361 |
# Generate response
|
|
|
|
|
|
|
|
|
|
| 362 |
with torch.no_grad():
|
| 363 |
outputs = our_chatbot.model.generate(
|
| 364 |
inputs=input_ids,
|
|
|
|
| 383 |
|
| 384 |
response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
|
| 387 |
if len(our_chatbot.conversation.messages) > 0:
|
| 388 |
last_message = our_chatbot.conversation.messages[-1]
|
|
|
|
| 400 |
our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
|
| 401 |
|
| 402 |
print(f"[DEBUG] Generated response length: {len(response)}")
|
|
|
|
|
|
|
|
|
|
| 403 |
except Exception as e:
|
| 404 |
print(f"[DEBUG] Response decoding error: {str(e)}")
|
| 405 |
return {"error": f"Response decoding failed: {str(e)}"}
|
|
|
|
| 641 |
print("Handler module loaded successfully!")
|
| 642 |
print("This handler is now ready for Hugging Face endpoints.")
|
| 643 |
print("Use the 'query' function as the main endpoint.")
|
| 644 |
+
print("Or use EndpointHandler class for Hugging Face compatibility.")
|