gbrabbit commited on
Commit
eab049e
ยท
1 Parent(s): 987b330

Auto commit at 25-2025-08 14:37:20

Browse files
lily_llm_api/services/generation_service.py CHANGED
@@ -283,8 +283,8 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
283
  per_image_tokens = [3000] * len(all_pixel_values)
284
 
285
  # 2) ํ…์ŠคํŠธ ๊ธธ์ด ์ธก์ • (์ด๋ฏธ์ง€ ํ† ํฐ ์ œ์™ธํ•œ ํ”„๋กฌํ”„ํŠธ)
286
- # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์—์„œ๋„ RAG ์Šค๋‹ˆํŽซ(์ถ•์•ฝ)์„ ํฌํ•จํ•˜์—ฌ ํ…์ŠคํŠธ ๊ธธ์ด๋ฅผ ์‚ฐ์ •
287
- base_text_prompt = f"Human: {rag_snippet_short}{prompt}\nAssistant:"
288
  text_inputs = tokenizer(
289
  base_text_prompt,
290
  return_tensors="pt",
@@ -411,10 +411,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
411
  # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์— ๋งž๊ฒŒ <image> ํ† ํฐ ์ƒ์„ฑ
412
  num_images = len(all_pixel_values)
413
  image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜๋งŒํผ <image> ํ† ํฐ ์ƒ์„ฑ
414
- # ๋‹ต๋ณ€ ์œ ๋„๋ฅผ ์œ„ํ•ด Assistant ํ”„๋ฆฌํ”ฝ์Šค ์ถ”๊ฐ€
415
- # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์—์„œ๋„ RAG ํ…์ŠคํŠธ ์Šค๋‹ˆํŽซ(์ถ•์•ฝ)์„ ์•ž์— ํฌํ•จํ•˜์—ฌ ํ…์ŠคํŠธ ๊ทผ๊ฑฐ๋ฅผ ๋ฐ˜์˜
416
- mm_text = f"{rag_snippet_short}{prompt}" if rag_snippet_short else prompt
417
- formatted_prompt = f"Human: {image_tokens}{mm_text}\nAssistant:"
 
418
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ํ˜•์‹): {formatted_prompt}")
419
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ† ํฐ ์ƒ์„ฑ: {num_images}๊ฐœ ์ด๋ฏธ์ง€ -> {image_tokens}")
420
  image_processed = True
@@ -603,13 +604,17 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
603
  except Exception:
604
  neg_exists = False
605
  if not neg_exists and len(all_pixel_values) > 0:
606
- print("โš ๏ธ [DEBUG] -1 ํ† ํฐ ์—†์Œ โ†’ RAG ์Šค๋‹ˆํŽซ ๊ธธ์ด ์ค„์—ฌ ์žฌ์‹œ๋„")
607
  for limit in [128, 64, 0]:
608
  try:
609
  base_snippet = (context_prompt or "")[:limit]
610
- if base_snippet and not base_snippet.endswith("\n"):
611
- base_snippet += "\n"
612
- base_prompt_retry = f"Human: {'<image>' * len(all_pixel_values)}{base_snippet}{prompt}\nAssistant:"
 
 
 
 
613
  print(f"๐Ÿ” [DEBUG] ์žฌ์‹œ๋„ limit={limit}: {base_prompt_retry}")
614
  inputs_retry = tokenizer.encode_prompt(
615
  prompt=base_prompt_retry,
 
283
  per_image_tokens = [3000] * len(all_pixel_values)
284
 
285
  # 2) ํ…์ŠคํŠธ ๊ธธ์ด ์ธก์ • (์ด๋ฏธ์ง€ ํ† ํฐ ์ œ์™ธํ•œ ํ”„๋กฌํ”„ํŠธ)
286
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์—์„œ๋„ RAG ์Šค๋‹ˆํŽซ์€ ์ปจํ…์ŠคํŠธ ๋ธ”๋ก์œผ๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ๊ธธ์ด๋ฅผ ์‚ฐ์ •
287
+ base_text_prompt = f"{('[CONTEXT]\n'+rag_snippet_short+'[/CONTEXT]\n') if rag_snippet_short else ''}Human: {prompt}\nAssistant:"
288
  text_inputs = tokenizer(
289
  base_text_prompt,
290
  return_tensors="pt",
 
411
  # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์— ๋งž๊ฒŒ <image> ํ† ํฐ ์ƒ์„ฑ
412
  num_images = len(all_pixel_values)
413
  image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜๋งŒํผ <image> ํ† ํฐ ์ƒ์„ฑ
414
+ # RAG ์Šค๋‹ˆํŽซ์€ ๋ณ„๋„ ์ปจํ…์ŠคํŠธ ๋ธ”๋ก์œผ๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ์—์ฝ” ๋ฐฉ์ง€
415
+ context_block = f"[CONTEXT]\n{rag_snippet_short}[/CONTEXT]\n" if rag_snippet_short else ""
416
+ # ์‚ฌ์šฉ์ž ๋ฐœํ™”๋Š” ์ˆœ์ˆ˜ ์งˆ๋ฌธ๋งŒ ์œ ์ง€
417
+ mm_text = prompt
418
+ formatted_prompt = f"{context_block}Human: {image_tokens}{mm_text}\nAssistant:"
419
  print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ํ˜•์‹): {formatted_prompt}")
420
  print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ† ํฐ ์ƒ์„ฑ: {num_images}๊ฐœ ์ด๋ฏธ์ง€ -> {image_tokens}")
421
  image_processed = True
 
604
  except Exception:
605
  neg_exists = False
606
  if not neg_exists and len(all_pixel_values) > 0:
607
+ print("โš ๏ธ [DEBUG] -1 ํ† ํฐ ์—†์Œ โ†’ ์ปจํ…์ŠคํŠธ ๋ธ”๋ก ๊ธธ์ด ์ค„์—ฌ ์žฌ์‹œ๋„")
608
  for limit in [128, 64, 0]:
609
  try:
610
  base_snippet = (context_prompt or "")[:limit]
611
+ if base_snippet:
612
+ if not base_snippet.endswith("\n"):
613
+ base_snippet += "\n"
614
+ context_block = f"[CONTEXT]\n{base_snippet}[/CONTEXT]\n"
615
+ else:
616
+ context_block = ""
617
+ base_prompt_retry = f"{context_block}Human: {'<image>' * len(all_pixel_values)}{prompt}\nAssistant:"
618
  print(f"๐Ÿ” [DEBUG] ์žฌ์‹œ๋„ limit={limit}: {base_prompt_retry}")
619
  inputs_retry = tokenizer.encode_prompt(
620
  prompt=base_prompt_retry,