Files changed (1) hide show
  1. handler.py +32 -11
handler.py CHANGED
@@ -259,7 +259,7 @@ def add_message(message_text, image_input=None):
259
  """Add a message to the conversation"""
260
  return {"status": "success", "message": "Message added"}
261
 
262
- def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, max_output_tokens=4096):
263
  """Generate response for the given message and image"""
264
  if not LLAVA_AVAILABLE:
265
  return {"error": "LLaVA modules not available"}
@@ -340,6 +340,7 @@ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, ma
340
  temperature=temperature,
341
  top_p=top_p,
342
  max_new_tokens=max_output_tokens,
 
343
  use_cache=False,
344
  stopping_criteria=[stopping_criteria],
345
  )
@@ -479,23 +480,43 @@ def query(payload):
479
  return {"error": "Model initialization failed"}
480
 
481
  try:
482
- # Extract parameters from payload
483
- message_text = payload.get("message", "")
484
- image_input = payload.get("image", None)
485
- temperature = payload.get("temperature", 0.05)
486
- top_p = payload.get("top_p", 1.0)
487
- max_output_tokens = payload.get("max_output_tokens", 4096)
488
 
489
- if not message_text or not image_input:
490
- return {"error": "Both 'message' and 'image' are required in the payload"}
 
 
 
491
 
492
- # Generate response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  result = generate_response(
494
  message_text=message_text,
495
  image_input=image_input,
496
  temperature=temperature,
497
  top_p=top_p,
498
- max_output_tokens=max_output_tokens
 
 
499
  )
500
 
501
  return result
 
259
  """Add a message to the conversation"""
260
  return {"status": "success", "message": "Message added"}
261
 
262
+ def generate_response(message_text, image_input, temperature=0.05, top_p=1.0, max_output_tokens=4096, repetition_penalty=1.0, conv_mode_override=None):
263
  """Generate response for the given message and image"""
264
  if not LLAVA_AVAILABLE:
265
  return {"error": "LLaVA modules not available"}
 
340
  temperature=temperature,
341
  top_p=top_p,
342
  max_new_tokens=max_output_tokens,
343
+ repetition_penalty=repetition_penalty,
344
  use_cache=False,
345
  stopping_criteria=[stopping_criteria],
346
  )
 
480
  return {"error": "Model initialization failed"}
481
 
482
  try:
483
+ print(f"[DEBUG] query payload keys={list(payload.keys()) if hasattr(payload,'keys') else 'N/A'}")
 
 
 
 
 
484
 
485
+ # Extract prompt with multiple possible keys
486
+ message_text = (payload.get("message") or
487
+ payload.get("query") or
488
+ payload.get("prompt") or
489
+ payload.get("istem") or "")
490
 
491
+ # Extract image with multiple possible keys
492
+ image_input = (payload.get("image") or
493
+ payload.get("image_url") or
494
+ payload.get("img") or None)
495
+
496
+ # Extract generation parameters with fallbacks
497
+ temperature = float(payload.get("temperature", 0.05))
498
+ top_p = float(payload.get("top_p", 1.0))
499
+ max_output_tokens = int(payload.get("max_output_tokens",
500
+ payload.get("max_new_tokens",
501
+ payload.get("max_tokens", 4096))))
502
+ repetition_penalty = float(payload.get("repetition_penalty", 1.0))
503
+ conv_mode_override = payload.get("conv_mode", None)
504
+
505
+ if not message_text or not message_text.strip():
506
+ return {"error": "Missing prompt text. Use 'message', 'query', 'prompt', or 'istem' key"}
507
+
508
+ if not image_input:
509
+ return {"error": "Missing image. Use 'image', 'image_url', or 'img' key"}
510
+
511
+ # Generate response with all parameters
512
  result = generate_response(
513
  message_text=message_text,
514
  image_input=image_input,
515
  temperature=temperature,
516
  top_p=top_p,
517
+ max_output_tokens=max_output_tokens,
518
+ repetition_penalty=repetition_penalty,
519
+ conv_mode_override=conv_mode_override
520
  )
521
 
522
  return result