|
|
"""
|
|
|
Graph Extraction ํ๋กฌํํธ
|
|
|
์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํ๋ GraphRAG ๊ธฐ๋ฐ ํ๋กฌํํธ
|
|
|
"""
|
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
def get_graph_extraction_prompt(
|
|
|
episode_content: str,
|
|
|
episode_title: str,
|
|
|
full_content: Optional[str] = None,
|
|
|
parent_chunk_info: Optional[str] = None,
|
|
|
max_length: int = 10000
|
|
|
) -> str:
|
|
|
"""
|
|
|
Graph Extraction์ ์ํ ํ๋กฌํํธ ์์ฑ
|
|
|
|
|
|
Args:
|
|
|
episode_content: ๋ถ์ํ ํ์ฐจ ๋ด์ฉ
|
|
|
episode_title: ํ์ฐจ ์ ๋ชฉ (์: '1ํ', '2ํ')
|
|
|
full_content: ์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ)
|
|
|
parent_chunk_info: Parent Chunk ์ ๋ณด (์ ํ์ฌํญ)
|
|
|
max_length: ํ๋กฌํํธ์ ํฌํจํ ์ต๋ ํ
์คํธ ๊ธธ์ด
|
|
|
|
|
|
Returns:
|
|
|
ํ๋กฌํํธ ๋ฌธ์์ด
|
|
|
"""
|
|
|
|
|
|
content_preview = episode_content[:max_length]
|
|
|
is_truncated = len(episode_content) > max_length
|
|
|
|
|
|
truncation_note = "\n(์ฐธ๊ณ : ํ์ฐจ ๋ด์ฉ์ด ๊ธธ์ด ์ผ๋ถ๋ง ์ฌ์ฉ๋์์ต๋๋ค.)" if is_truncated else ""
|
|
|
|
|
|
|
|
|
full_content_preview = ""
|
|
|
if full_content:
|
|
|
|
|
|
if len(full_content) > 30000:
|
|
|
full_content_preview = full_content[:15000] + "\n... (์ค๊ฐ ์๋ต) ...\n" + full_content[-15000:]
|
|
|
else:
|
|
|
full_content_preview = full_content
|
|
|
|
|
|
prompt = f"""๋ค์ ์น์์ค์ {episode_title} ํ์ฐจ์์ ์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํด์ฃผ์ธ์.
|
|
|
|
|
|
{parent_chunk_info if parent_chunk_info else ""}
|
|
|
|
|
|
์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ):
|
|
|
{full_content_preview[:50000] if full_content_preview else "์์"}
|
|
|
|
|
|
๋ถ์ํ ํ์ฐจ ๋ด์ฉ ({episode_title}):
|
|
|
{content_preview}{truncation_note}
|
|
|
|
|
|
๋ค์ ํ์์ผ๋ก JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์:
|
|
|
|
|
|
{{
|
|
|
"entities": {{
|
|
|
"characters": [
|
|
|
{{
|
|
|
"name": "์ธ๋ฌผ ์ด๋ฆ",
|
|
|
"type": "์ธ๋ฌผ",
|
|
|
"description": "์ธ๋ฌผ์ ๋ํ ๊ฐ๋จํ ์ค๋ช
",
|
|
|
"role": "์ด ํ์ฐจ์์์ ์ญํ (์: ์ฃผ์ธ๊ณต, ์กฐ์ฐ, ์
์ญ ๋ฑ)"
|
|
|
}}
|
|
|
],
|
|
|
"locations": [
|
|
|
{{
|
|
|
"name": "์ฅ์ ์ด๋ฆ",
|
|
|
"type": "์ฅ์",
|
|
|
"description": "์ฅ์์ ๋ํ ๊ฐ๋จํ ์ค๋ช
",
|
|
|
"category": "์ฅ์ ์ ํ (์: ๋์, ๊ฑด๋ฌผ, ์ฐจ์ ๋ฑ)"
|
|
|
}}
|
|
|
]
|
|
|
}},
|
|
|
"relationships": [
|
|
|
{{
|
|
|
"source": "๊ด๊ณ์ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ)",
|
|
|
"target": "๊ด๊ณ์ ๋์ (์ธ๋ฌผ ์ด๋ฆ ๋๋ ์ฅ์ ์ด๋ฆ)",
|
|
|
"type": "๊ด๊ณ ์ ํ (์: ์น๊ตฌ, ์ , ์ฐ์ธ, ๊ฑฐ์ฃผ์ง, ๋ฐฉ๋ฌธ์ง ๋ฑ)",
|
|
|
"description": "๊ด๊ณ์ ๋ํ ์์ธ ์ค๋ช
",
|
|
|
"event": "์ด ๊ด๊ณ๋ฅผ ํ์ฑํ๊ฑฐ๋ ๋ณํ์ํจ ์ฌ๊ฑด (์๋ ๊ฒฝ์ฐ)"
|
|
|
}}
|
|
|
],
|
|
|
"events": [
|
|
|
{{
|
|
|
"name": "์ฌ๊ฑด ์ด๋ฆ",
|
|
|
"description": "์ฌ๊ฑด์ ๋ํ ์์ธ ์ค๋ช
",
|
|
|
"participants": ["๊ด๋ จ ์ธ๋ฌผ1", "๊ด๋ จ ์ธ๋ฌผ2"],
|
|
|
"location": "์ฌ๊ฑด์ด ๋ฐ์ํ ์ฅ์",
|
|
|
"significance": "์ฌ๊ฑด์ ์ค์๋ (์: ์ฃผ์ ์ฌ๊ฑด, ๋ถ์ ์ฌ๊ฑด ๋ฑ)"
|
|
|
}}
|
|
|
]
|
|
|
}}
|
|
|
|
|
|
์ค์ ์ฌํญ:
|
|
|
1. ์ํฐํฐ๋ ์ด ํ์ฐจ์์ ์ค์ ๋ก ๋ฑ์ฅํ๊ฑฐ๋ ์ธ๊ธ๋ ์ธ๋ฌผ๊ณผ ์ฅ์๋ง ์ถ์ถํ์ธ์.
|
|
|
2. ๊ด๊ณ๋ ์ด ํ์ฐจ์์ ์๋ก ํ์ฑ๋๊ฑฐ๋ ๋ณํํ ๊ด๊ณ๋ฅผ ์ค์ฌ์ผ๋ก ์ถ์ถํ์ธ์.
|
|
|
3. ์ฌ๊ฑด์ ์ด ํ์ฐจ์์ ์ผ์ด๋ ๊ตฌ์ฒด์ ์ธ ์ฌ๊ฑด๋ค์ ์ถ์ถํ์ธ์.
|
|
|
4. ์๋ต์ ์ค์ง JSON ํ์๋ง ์ฌ์ฉํ๊ณ , ๋ค๋ฅธ ์ค๋ช
์ด๋ ๋งํฌ๋ค์ด์ ํฌํจํ์ง ๋ง์ธ์.
|
|
|
5. JSON ํ์์ด ์ฌ๋ฐ๋ฅธ์ง ๋ฐ๋์ ํ์ธํ์ธ์ (๋ฐ์ดํ ์ด์ค์ผ์ดํ ๋ฑ).
|
|
|
6. ๋ฐฐ์ด์ด ๋น์ด์์ ๊ฒฝ์ฐ ๋น ๋ฐฐ์ด []๋ก ํ์ํ์ธ์.
|
|
|
7. ํ๋ ๊ฐ์ด ์๋ ๊ฒฝ์ฐ null ๋์ ๋น ๋ฌธ์์ด "" ๋๋ ๋น ๋ฐฐ์ด []์ ์ฌ์ฉํ์ธ์."""
|
|
|
|
|
|
return prompt
|
|
|
|
|
|
|