Spaces:
Sleeping
Sleeping
Auto commit at 25-2025-08 18:18:53
Browse files
lily_llm_api/services/generation_service.py
CHANGED
|
@@ -396,12 +396,19 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
| 396 |
|
| 397 |
# formatted_prompt ์ด๊ธฐํ
|
| 398 |
formatted_prompt = None
|
| 399 |
-
# ๋ฉํฐ๋ชจ๋ฌ์ฉ RAG ์ค๋ํซ(๊ธธ์ด ์ ํ) ์ค๋น
|
| 400 |
rag_snippet_short = ""
|
| 401 |
if context_prompt and isinstance(context_prompt, str):
|
| 402 |
try:
|
| 403 |
-
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
if not rag_snippet_short.endswith("\n"):
|
| 406 |
rag_snippet_short += "\n"
|
| 407 |
except Exception:
|
|
@@ -412,8 +419,16 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
| 412 |
# ๐ ๊ณต์ Kanana ํ์: ์ด๋ฏธ์ง ๊ฐ์์ ๋ง๊ฒ <image> ํ ํฐ ์์ฑ
|
| 413 |
num_images = len(all_pixel_values)
|
| 414 |
image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง ๊ฐ์๋งํผ <image> ํ ํฐ ์์ฑ
|
| 415 |
-
# RAG ์ค๋ํซ์ ๋ณ๋ ์ปจํ
์คํธ ๋ธ๋ก์ผ๋ก
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# ์ฌ์ฉ์ ๋ฐํ๋ ์์ ์ง๋ฌธ๋ง ์ ์ง
|
| 418 |
mm_text = prompt
|
| 419 |
formatted_prompt = f"{context_block}Human: {image_tokens}{mm_text}\nAssistant:"
|
|
@@ -742,9 +757,8 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
| 742 |
gen_config['top_p'] = 1.0
|
| 743 |
gen_config['repetition_penalty'] = 1.0
|
| 744 |
gen_config['no_repeat_ngram_size'] = 0
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
else:
|
| 748 |
gen_config['max_new_tokens'] = 64
|
| 749 |
except Exception as _e_cfg:
|
| 750 |
print(f"โ ๏ธ [SPEED] ์์ฑ ์ค์ ๋ค์ด์ดํธ ์คํจ: {_e_cfg}")
|
|
|
|
| 396 |
|
| 397 |
# formatted_prompt ์ด๊ธฐํ
|
| 398 |
formatted_prompt = None
|
| 399 |
+
# ๋ฉํฐ๋ชจ๋ฌ์ฉ RAG ์ค๋ํซ(๊ธธ์ด ์ ํ) ์ค๋น - ๋ํ ์ด๋ ฅ์ ์ ์ธํ๊ณ [RAG] ๋ธ๋ก๋ง ์ฐ์ ์ถ์ถ
|
| 400 |
rag_snippet_short = ""
|
| 401 |
if context_prompt and isinstance(context_prompt, str):
|
| 402 |
try:
|
| 403 |
+
base_ctx = context_prompt
|
| 404 |
+
try:
|
| 405 |
+
_idx = base_ctx.find("[RAG]")
|
| 406 |
+
if _idx != -1:
|
| 407 |
+
base_ctx = base_ctx[_idx:]
|
| 408 |
+
except Exception:
|
| 409 |
+
pass
|
| 410 |
+
# ๊ณผ๋ํ ๊ธธ์ด ๋ฐฉ์ง: 160์๋ก ์ถ์
|
| 411 |
+
rag_snippet_short = base_ctx[:160]
|
| 412 |
if not rag_snippet_short.endswith("\n"):
|
| 413 |
rag_snippet_short += "\n"
|
| 414 |
except Exception:
|
|
|
|
| 419 |
# ๐ ๊ณต์ Kanana ํ์: ์ด๋ฏธ์ง ๊ฐ์์ ๋ง๊ฒ <image> ํ ํฐ ์์ฑ
|
| 420 |
num_images = len(all_pixel_values)
|
| 421 |
image_tokens = "<image>" * num_images # ์ด๋ฏธ์ง ๊ฐ์๋งํผ <image> ํ ํฐ ์์ฑ
|
| 422 |
+
# ์ด๋ฏธ์ง ์ฐ์ ์ง์นจ + RAG ์ค๋ํซ์ ๋ณ๋ ์ปจํ
์คํธ ๋ธ๋ก์ผ๋ก ๋ถ๋ฆฌ
|
| 423 |
+
guideline_block = (
|
| 424 |
+
"[GUIDELINES]\n"
|
| 425 |
+
"- ์ด๋ฏธ์ง๋ฅผ 1์์ ๊ทผ๊ฑฐ๋ก ์ฌ์ฉํ์ธ์.\n"
|
| 426 |
+
"- [CONTEXT]๋ ๋ณด์กฐ ํํธ์ด๋ฉฐ, ์ด๋ฏธ์ง์ ์ถฉ๋ ์ ์ด๋ฏธ์ง๋ฅผ ๋ฐ๋ฅด์ธ์.\n"
|
| 427 |
+
"- RAG/์ปจํ
์คํธ ๋ฌธ๊ตฌ๋ฅผ ๊ทธ๋๋ก ์ธ์ฉํ์ง ๋ง์ธ์.\n"
|
| 428 |
+
"- ํต์ฌ๋ง ๊ฐ๊ฒฐํ ์์ฝํ์ธ์.\n"
|
| 429 |
+
"[/GUIDELINES]\n"
|
| 430 |
+
)
|
| 431 |
+
context_block = (guideline_block + (f"[CONTEXT]\n{rag_snippet_short}[/CONTEXT]\n" if rag_snippet_short else ""))
|
| 432 |
# ์ฌ์ฉ์ ๋ฐํ๋ ์์ ์ง๋ฌธ๋ง ์ ์ง
|
| 433 |
mm_text = prompt
|
| 434 |
formatted_prompt = f"{context_block}Human: {image_tokens}{mm_text}\nAssistant:"
|
|
|
|
| 757 |
gen_config['top_p'] = 1.0
|
| 758 |
gen_config['repetition_penalty'] = 1.0
|
| 759 |
gen_config['no_repeat_ngram_size'] = 0
|
| 760 |
+
# ์ฌ์ฉ์ ์ค์ ์ ์กด์ค: ๊ฐ์ด ์ง์ ๋ ๊ฒฝ์ฐ ๊ทธ๋๋ก ์ ์ง, ์์ผ๋ฉด 64๋ก ์ ํ
|
| 761 |
+
if 'max_new_tokens' not in gen_config or gen_config['max_new_tokens'] is None:
|
|
|
|
| 762 |
gen_config['max_new_tokens'] = 64
|
| 763 |
except Exception as _e_cfg:
|
| 764 |
print(f"โ ๏ธ [SPEED] ์์ฑ ์ค์ ๋ค์ด์ดํธ ์คํจ: {_e_cfg}")
|