Files changed (1) hide show
  1. handler.py +6 -57
handler.py CHANGED
@@ -327,70 +327,38 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
327
  return {"error": f"Image processing failed: {str(e)}"}
328
 
329
  # Prepare conversation - reset for each request to avoid history issues
330
- print(f"[DEBUG] Resetting conversation for new request...")
331
  try:
332
  if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
333
  our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
334
- print(f"[DEBUG] Conversation reset using conv_mode: {our_chatbot.conv_mode}")
335
  else:
336
- # Create a completely new conversation instance
337
- from llava.conversation import conv_templates, SeparatorStyle
338
- default_conv_mode = "llava_v1"
339
- if default_conv_mode in conv_templates:
340
- our_chatbot.conversation = conv_templates[default_conv_mode].copy()
341
- print(f"[DEBUG] Conversation reset using default conv_mode: {default_conv_mode}")
342
- else:
343
- # Fallback: create minimal conversation
344
- our_chatbot.conversation.messages = []
345
- print(f"[DEBUG] Conversation reset using fallback method")
346
  except Exception as e:
347
  print(f"[DEBUG] Failed to reset conversation: {e}")
348
- # Emergency fallback: clear messages
349
- try:
350
- our_chatbot.conversation.messages = []
351
- print(f"[DEBUG] Emergency conversation reset completed")
352
- except:
353
- print(f"[DEBUG] Emergency conversation reset failed")
354
 
355
  inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
356
- print(f"[DEBUG] Conversation roles: {our_chatbot.conversation.roles}")
357
- print(f"[DEBUG] Adding user message: {inp[:100]}...")
358
-
359
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
360
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
361
-
362
  prompt = our_chatbot.conversation.get_prompt()
363
- print(f"[DEBUG] Generated prompt length: {len(prompt)}")
364
- print(f"[DEBUG] Prompt preview: {prompt[:200]}...")
365
 
366
  # Tokenize input
367
  input_ids = tokenizer_image_token(
368
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
369
  ).unsqueeze(0).to(our_chatbot.model.device)
370
 
371
- # Set up stopping criteria - make it more flexible
372
  stop_str = (
373
  our_chatbot.conversation.sep
374
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
375
  else our_chatbot.conversation.sep2
376
  )
377
- print(f"[DEBUG] Original stop_str: {stop_str}")
378
-
379
- # Use more flexible stopping criteria to allow longer responses
380
- keywords = [stop_str] if stop_str else []
381
- if not keywords:
382
- # If no separator, use common end tokens
383
- keywords = ["</s>", "<s>", "Human:", "Assistant:"]
384
-
385
- print(f"[DEBUG] Using keywords for stopping: {keywords}")
386
  stopping_criteria = KeywordsStoppingCriteria(
387
  keywords, our_chatbot.tokenizer, input_ids
388
  )
389
 
390
  # Generate response
391
- print(f"[DEBUG] Generating with max_new_tokens: {max_output_tokens}")
392
- print(f"[DEBUG] Stopping criteria: {stop_str}")
393
-
394
  with torch.no_grad():
395
  outputs = our_chatbot.model.generate(
396
  inputs=input_ids,
@@ -415,22 +383,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
415
 
416
  response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
417
 
418
- # Validate response
419
- if not response or not response.strip():
420
- print(f"[DEBUG] Empty response detected, trying alternative decoding...")
421
- # Try decoding without skip_special_tokens
422
- response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=False)
423
- if not response or not response.strip():
424
- print(f"[DEBUG] Still empty response, checking raw outputs...")
425
- # Check if outputs are valid
426
- raw_response = our_chatbot.tokenizer.decode(outputs[0], skip_special_tokens=True)
427
- print(f"[DEBUG] Raw response: {raw_response[:200]}...")
428
- # Extract only the new part
429
- response = raw_response[len(our_chatbot.tokenizer.decode(input_ids[0], skip_special_tokens=True)):]
430
- response = response.strip()
431
-
432
- print(f"[DEBUG] Final response: {response[:100]}...")
433
-
434
  print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
435
  if len(our_chatbot.conversation.messages) > 0:
436
  last_message = our_chatbot.conversation.messages[-1]
@@ -448,9 +400,6 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
448
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
449
 
450
  print(f"[DEBUG] Generated response length: {len(response)}")
451
- print(f"[DEBUG] Response word count: {len(response.split())}")
452
- print(f"[DEBUG] Response preview: {response[:100]}...")
453
- print(f"[DEBUG] Response ends with: {response[-50:] if len(response) > 50 else response}")
454
  except Exception as e:
455
  print(f"[DEBUG] Response decoding error: {str(e)}")
456
  return {"error": f"Response decoding failed: {str(e)}"}
@@ -692,4 +641,4 @@ if __name__ == "__main__":
692
  print("Handler module loaded successfully!")
693
  print("This handler is now ready for Hugging Face endpoints.")
694
  print("Use the 'query' function as the main endpoint.")
695
- print("Or use EndpointHandler class for Hugging Face compatibility.")
 
327
  return {"error": f"Image processing failed: {str(e)}"}
328
 
329
  # Prepare conversation - reset for each request to avoid history issues
 
330
  try:
331
  if hasattr(our_chatbot, 'conv_mode') and our_chatbot.conv_mode and LLAVA_AVAILABLE:
332
  our_chatbot.conversation = conv_templates[our_chatbot.conv_mode].copy()
 
333
  else:
334
+ # Use default conversation template
335
+ our_chatbot.conversation = our_chatbot.conversation.__class__()
 
 
 
 
 
 
 
 
336
  except Exception as e:
337
  print(f"[DEBUG] Failed to reset conversation: {e}")
338
+ # Continue with existing conversation
 
 
 
 
 
339
 
340
  inp = DEFAULT_IMAGE_TOKEN + "\n" + message_text
 
 
 
341
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[0], inp)
342
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], None)
 
343
  prompt = our_chatbot.conversation.get_prompt()
 
 
344
 
345
  # Tokenize input
346
  input_ids = tokenizer_image_token(
347
  prompt, our_chatbot.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
348
  ).unsqueeze(0).to(our_chatbot.model.device)
349
 
350
+ # Set up stopping criteria
351
  stop_str = (
352
  our_chatbot.conversation.sep
353
  if our_chatbot.conversation.sep_style != SeparatorStyle.TWO
354
  else our_chatbot.conversation.sep2
355
  )
356
+ keywords = [stop_str]
 
 
 
 
 
 
 
 
357
  stopping_criteria = KeywordsStoppingCriteria(
358
  keywords, our_chatbot.tokenizer, input_ids
359
  )
360
 
361
  # Generate response
 
 
 
362
  with torch.no_grad():
363
  outputs = our_chatbot.model.generate(
364
  inputs=input_ids,
 
383
 
384
  response = our_chatbot.tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True)
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  print(f"[DEBUG] Conversation messages length: {len(our_chatbot.conversation.messages)}")
387
  if len(our_chatbot.conversation.messages) > 0:
388
  last_message = our_chatbot.conversation.messages[-1]
 
400
  our_chatbot.conversation.append_message(our_chatbot.conversation.roles[1], response)
401
 
402
  print(f"[DEBUG] Generated response length: {len(response)}")
 
 
 
403
  except Exception as e:
404
  print(f"[DEBUG] Response decoding error: {str(e)}")
405
  return {"error": f"Response decoding failed: {str(e)}"}
 
641
  print("Handler module loaded successfully!")
642
  print("This handler is now ready for Hugging Face endpoints.")
643
  print("Use the 'query' function as the main endpoint.")
644
+ print("Or use EndpointHandler class for Hugging Face compatibility.")