gbrabbit commited on
Commit
ce269f5
ยท
1 Parent(s): 6e887de

Auto commit at 25-2025-08 18:18:53

Browse files
lily_llm_api/services/generation_service.py CHANGED
@@ -396,12 +396,19 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
396
 
397
  # formatted_prompt ์ดˆ๊ธฐํ™”
398
  formatted_prompt = None
399
- # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์šฉ RAG ์Šค๋‹ˆํŽซ(๊ธธ์ด ์ œํ•œ) ์ค€๋น„
400
  rag_snippet_short = ""
401
  if context_prompt and isinstance(context_prompt, str):
402
  try:
403
- # ๊ณผ๋„ํ•œ ๊ธธ์ด ๋ฐฉ์ง€: ์šฐ์„  256์ž๋กœ ์ œํ•œ
404
- rag_snippet_short = context_prompt[:256]
 
 
 
 
 
 
 
405
  if not rag_snippet_short.endswith("\n"):
406
  rag_snippet_short += "\n"
407
  except Exception:
@@ -412,8 +419,16 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
412
  # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์— ๋งž๊ฒŒ <image> ํ† ํฐ ์ƒ์„ฑ
413
  num_images = len(all_pixel_values)
414
  image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜๋งŒํผ <image> ํ† ํฐ ์ƒ์„ฑ
415
- # RAG ์Šค๋‹ˆํŽซ์€ ๋ณ„๋„ ์ปจํ…์ŠคํŠธ ๋ธ”๋ก์œผ๋กœ ๋ถ„๋ฆฌํ•˜์—ฌ ์—์ฝ” ๋ฐฉ์ง€
416
- context_block = f"[CONTEXT]\n{rag_snippet_short}[/CONTEXT]\n" if rag_snippet_short else ""
 
 
 
 
 
 
 
 
417
  # ์‚ฌ์šฉ์ž ๋ฐœํ™”๋Š” ์ˆœ์ˆ˜ ์งˆ๋ฌธ๋งŒ ์œ ์ง€
418
  mm_text = prompt
419
  formatted_prompt = f"{context_block}Human: {image_tokens}{mm_text}\nAssistant:"
@@ -742,9 +757,8 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
742
  gen_config['top_p'] = 1.0
743
  gen_config['repetition_penalty'] = 1.0
744
  gen_config['no_repeat_ngram_size'] = 0
745
- if 'max_new_tokens' in gen_config and gen_config['max_new_tokens'] is not None:
746
- gen_config['max_new_tokens'] = min(int(gen_config['max_new_tokens']), 64)
747
- else:
748
  gen_config['max_new_tokens'] = 64
749
  except Exception as _e_cfg:
750
  print(f"โš ๏ธ [SPEED] ์ƒ์„ฑ ์„ค์ • ๋‹ค์ด์–ดํŠธ ์‹คํŒจ: {_e_cfg}")
 
396
 
397
  # formatted_prompt ์ดˆ๊ธฐํ™”
398
  formatted_prompt = None
399
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ์šฉ RAG ์Šค๋‹ˆํŽซ(๊ธธ์ด ์ œํ•œ) ์ค€๋น„ - ๋Œ€ํ™” ์ด๋ ฅ์€ ์ œ์™ธํ•˜๊ณ  [RAG] ๋ธ”๋ก๋งŒ ์šฐ์„  ์ถ”์ถœ
400
  rag_snippet_short = ""
401
  if context_prompt and isinstance(context_prompt, str):
402
  try:
403
+ base_ctx = context_prompt
404
+ try:
405
+ _idx = base_ctx.find("[RAG]")
406
+ if _idx != -1:
407
+ base_ctx = base_ctx[_idx:]
408
+ except Exception:
409
+ pass
410
+ # ๊ณผ๋„ํ•œ ๊ธธ์ด ๋ฐฉ์ง€: 160์ž๋กœ ์ถ•์†Œ
411
+ rag_snippet_short = base_ctx[:160]
412
  if not rag_snippet_short.endswith("\n"):
413
  rag_snippet_short += "\n"
414
  except Exception:
 
419
  # ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์— ๋งž๊ฒŒ <image> ํ† ํฐ ์ƒ์„ฑ
420
  num_images = len(all_pixel_values)
421
  image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜๋งŒํผ <image> ํ† ํฐ ์ƒ์„ฑ
422
+ # ์ด๋ฏธ์ง€ ์šฐ์„  ์ง€์นจ + RAG ์Šค๋‹ˆํŽซ์€ ๋ณ„๋„ ์ปจํ…์ŠคํŠธ ๋ธ”๋ก์œผ๋กœ ๋ถ„๋ฆฌ
423
+ guideline_block = (
424
+ "[GUIDELINES]\n"
425
+ "- ์ด๋ฏธ์ง€๋ฅผ 1์ˆœ์œ„ ๊ทผ๊ฑฐ๋กœ ์‚ฌ์šฉํ•˜์„ธ์š”.\n"
426
+ "- [CONTEXT]๋Š” ๋ณด์กฐ ํžŒํŠธ์ด๋ฉฐ, ์ด๋ฏธ์ง€์™€ ์ถฉ๋Œ ์‹œ ์ด๋ฏธ์ง€๋ฅผ ๋”ฐ๋ฅด์„ธ์š”.\n"
427
+ "- RAG/์ปจํ…์ŠคํŠธ ๋ฌธ๊ตฌ๋ฅผ ๊ทธ๋Œ€๋กœ ์ธ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”.\n"
428
+ "- ํ•ต์‹ฌ๋งŒ ๊ฐ„๊ฒฐํžˆ ์š”์•ฝํ•˜์„ธ์š”.\n"
429
+ "[/GUIDELINES]\n"
430
+ )
431
+ context_block = (guideline_block + (f"[CONTEXT]\n{rag_snippet_short}[/CONTEXT]\n" if rag_snippet_short else ""))
432
  # ์‚ฌ์šฉ์ž ๋ฐœํ™”๋Š” ์ˆœ์ˆ˜ ์งˆ๋ฌธ๋งŒ ์œ ์ง€
433
  mm_text = prompt
434
  formatted_prompt = f"{context_block}Human: {image_tokens}{mm_text}\nAssistant:"
 
757
  gen_config['top_p'] = 1.0
758
  gen_config['repetition_penalty'] = 1.0
759
  gen_config['no_repeat_ngram_size'] = 0
760
+ # ์‚ฌ์šฉ์ž ์„ค์ •์„ ์กด์ค‘: ๊ฐ’์ด ์ง€์ •๋œ ๊ฒฝ์šฐ ๊ทธ๋Œ€๋กœ ์œ ์ง€, ์—†์œผ๋ฉด 64๋กœ ์ œํ•œ
761
+ if 'max_new_tokens' not in gen_config or gen_config['max_new_tokens'] is None:
 
762
  gen_config['max_new_tokens'] = 64
763
  except Exception as _e_cfg:
764
  print(f"โš ๏ธ [SPEED] ์ƒ์„ฑ ์„ค์ • ๋‹ค์ด์–ดํŠธ ์‹คํŒจ: {_e_cfg}")