|
|
""" |
|
|
Graph Extraction ํ๋กฌํํธ |
|
|
์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํ๋ GraphRAG ๊ธฐ๋ฐ ํ๋กฌํํธ |
|
|
""" |
|
|
|
|
|
from typing import Optional |
|
|
|
|
|
|
|
|
def get_graph_extraction_prompt( |
|
|
episode_content: str, |
|
|
episode_title: str, |
|
|
full_content: Optional[str] = None, |
|
|
parent_chunk_info: Optional[str] = None, |
|
|
max_length: int = 10000 |
|
|
) -> str: |
|
|
""" |
|
|
Graph Extraction์ ์ํ ํ๋กฌํํธ ์์ฑ |
|
|
|
|
|
Args: |
|
|
episode_content: ๋ถ์ํ ํ์ฐจ ๋ด์ฉ |
|
|
episode_title: ํ์ฐจ ์ ๋ชฉ (์: '1ํ', '2ํ') |
|
|
full_content: ์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ) |
|
|
parent_chunk_info: Parent Chunk ์ ๋ณด (์ ํ์ฌํญ) |
|
|
max_length: ํ๋กฌํํธ์ ํฌํจํ ์ต๋ ํ
์คํธ ๊ธธ์ด |
|
|
|
|
|
Returns: |
|
|
ํ๋กฌํํธ ๋ฌธ์์ด |
|
|
""" |
|
|
|
|
|
content_preview = episode_content[:max_length] |
|
|
is_truncated = len(episode_content) > max_length |
|
|
|
|
|
truncation_note = "\n(์ฐธ๊ณ : ํ์ฐจ ๋ด์ฉ์ด ๊ธธ์ด ์ผ๋ถ๋ง ์ฌ์ฉ๋์์ต๋๋ค.)" if is_truncated else "" |
|
|
|
|
|
|
|
|
full_content_preview = "" |
|
|
if full_content: |
|
|
|
|
|
if len(full_content) > 30000: |
|
|
full_content_preview = full_content[:15000] + "\n... (์ค๊ฐ ์๋ต) ...\n" + full_content[-15000:] |
|
|
else: |
|
|
full_content_preview = full_content |
|
|
|
|
|
prompt = f"""๋ค์ ์น์์ค์ {episode_title} ํ์ฐจ์์ ์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํด์ฃผ์ธ์. |
|
|
|
|
|
{parent_chunk_info if parent_chunk_info else ""} |
|
|
|
|
|
์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ): |
|
|
{full_content_preview[:50000] if full_content_preview else "์์"} |
|
|
|
|
|
๋ถ์ํ ํ์ฐจ ๋ด์ฉ ({episode_title}): |
|
|
{content_preview}{truncation_note} |
|
|
|
|
|
๋ค์ ํ์์ผ๋ก JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์: |
|
|
|
|
|
{{ |
|
|
"entities": {{ |
|
|
"characters": [ |
|
|
{{ |
|
|
"name": "์ธ๋ฌผ ์ด๋ฆ", |
|
|
"type": "์ธ๋ฌผ", |
|
|
"description": "์ธ๋ฌผ์ ๋ํ ๊ฐ๋จํ ์ค๋ช
", |
|
|
"role": "์ด ํ์ฐจ์์์ ์ญํ (์: ์ฃผ์ธ๊ณต, ์กฐ์ฐ, ์
์ญ ๋ฑ)" |
|
|
}} |
|
|
], |
|
|
"locations": [ |
|
|
{{ |
|
|
"name": "์ฅ์ ์ด๋ฆ", |
|
|
"type": "์ฅ์", |
|
|
"description": "์ฅ์์ ๋ํ ๊ฐ๋จํ ์ค๋ช
", |
|
|
"category": "์ฅ์ ์ ํ (์: ๋์, ๊ฑด๋ฌผ, ์ฐจ์ ๋ฑ)" |
|
|
}} |
|
|
] |
|
|
}}, |
|
|
"relationships": [ |
|
|
{{ |
|
|
"source": "๊ด๊ณ์ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ)", |
|
|
"target": "๊ด๊ณ์ ๋์ (์ธ๋ฌผ ์ด๋ฆ ๋๋ ์ฅ์ ์ด๋ฆ)", |
|
|
"type": "๊ด๊ณ ์ ํ (์: ์น๊ตฌ, ์ , ์ฐ์ธ, ๊ฑฐ์ฃผ์ง, ๋ฐฉ๋ฌธ์ง ๋ฑ)", |
|
|
"description": "๊ด๊ณ์ ๋ํ ์์ธ ์ค๋ช
", |
|
|
"event": "์ด ๊ด๊ณ๋ฅผ ํ์ฑํ๊ฑฐ๋ ๋ณํ์ํจ ์ฌ๊ฑด (์๋ ๊ฒฝ์ฐ)" |
|
|
}} |
|
|
], |
|
|
"events": [ |
|
|
{{ |
|
|
"name": "์ฌ๊ฑด ์ด๋ฆ", |
|
|
"description": "์ฌ๊ฑด์ ๋ํ ์์ธ ์ค๋ช
", |
|
|
"participants": ["๊ด๋ จ ์ธ๋ฌผ1", "๊ด๋ จ ์ธ๋ฌผ2"], |
|
|
"location": "์ฌ๊ฑด์ด ๋ฐ์ํ ์ฅ์", |
|
|
"significance": "์ฌ๊ฑด์ ์ค์๋ (์: ์ฃผ์ ์ฌ๊ฑด, ๋ถ์ ์ฌ๊ฑด ๋ฑ)" |
|
|
}} |
|
|
] |
|
|
}} |
|
|
|
|
|
์ค์ ์ฌํญ: |
|
|
1. ์ํฐํฐ๋ ์ด ํ์ฐจ์์ ์ค์ ๋ก ๋ฑ์ฅํ๊ฑฐ๋ ์ธ๊ธ๋ ์ธ๋ฌผ๊ณผ ์ฅ์๋ง ์ถ์ถํ์ธ์. |
|
|
2. ๊ด๊ณ๋ ์ด ํ์ฐจ์์ ์๋ก ํ์ฑ๋๊ฑฐ๋ ๋ณํํ ๊ด๊ณ๋ฅผ ์ค์ฌ์ผ๋ก ์ถ์ถํ์ธ์. |
|
|
3. ์ฌ๊ฑด์ ์ด ํ์ฐจ์์ ์ผ์ด๋ ๊ตฌ์ฒด์ ์ธ ์ฌ๊ฑด๋ค์ ์ถ์ถํ์ธ์. |
|
|
4. ์๋ต์ ์ค์ง JSON ํ์๋ง ์ฌ์ฉํ๊ณ , ๋ค๋ฅธ ์ค๋ช
์ด๋ ๋งํฌ๋ค์ด์ ํฌํจํ์ง ๋ง์ธ์. |
|
|
5. JSON ํ์์ด ์ฌ๋ฐ๋ฅธ์ง ๋ฐ๋์ ํ์ธํ์ธ์ (๋ฐ์ดํ ์ด์ค์ผ์ดํ ๋ฑ). |
|
|
6. ๋ฐฐ์ด์ด ๋น์ด์์ ๊ฒฝ์ฐ ๋น ๋ฐฐ์ด []๋ก ํ์ํ์ธ์. |
|
|
7. ํ๋ ๊ฐ์ด ์๋ ๊ฒฝ์ฐ null ๋์ ๋น ๋ฌธ์์ด "" ๋๋ ๋น ๋ฐฐ์ด []์ ์ฌ์ฉํ์ธ์.""" |
|
|
|
|
|
return prompt |
|
|
|
|
|
|