Spaces:
Restarting on CPU Upgrade

soyailabs / app\prompts\graph_extraction.py
wiizm's picture
Upload app\prompts\graph_extraction.py with huggingface_hub
0adcd58 verified
raw
history blame
4.1 kB
"""
Graph Extraction ํ”„๋กฌํ”„ํŠธ
์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•˜๋Š” GraphRAG ๊ธฐ๋ฐ˜ ํ”„๋กฌํ”„ํŠธ
"""
from typing import Optional
def get_graph_extraction_prompt(
episode_content: str,
episode_title: str,
full_content: Optional[str] = None,
parent_chunk_info: Optional[str] = None,
max_length: int = 10000
) -> str:
"""
Graph Extraction์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
Args:
episode_content: ๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ
episode_title: ํšŒ์ฐจ ์ œ๋ชฉ (์˜ˆ: '1ํ™”', '2ํ™”')
full_content: ์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ)
parent_chunk_info: Parent Chunk ์ •๋ณด (์„ ํƒ์‚ฌํ•ญ)
max_length: ํ”„๋กฌํ”„ํŠธ์— ํฌํ•จํ•  ์ตœ๋Œ€ ํ…์ŠคํŠธ ๊ธธ์ด
Returns:
ํ”„๋กฌํ”„ํŠธ ๋ฌธ์ž์—ด
"""
# ํšŒ์ฐจ ๋‚ด์šฉ ๊ธธ์ด ์ œํ•œ
content_preview = episode_content[:max_length]
is_truncated = len(episode_content) > max_length
truncation_note = "\n(์ฐธ๊ณ : ํšŒ์ฐจ ๋‚ด์šฉ์ด ๊ธธ์–ด ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.)" if is_truncated else ""
# ์ „์ฒด ๋‚ด์šฉ ์ฐธ๊ณ ์šฉ (์„ ํƒ์‚ฌํ•ญ)
full_content_preview = ""
if full_content:
# ์ „์ฒด ๋‚ด์šฉ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๊ณผ ๋’ท๋ถ€๋ถ„ ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ
if len(full_content) > 30000:
full_content_preview = full_content[:15000] + "\n... (์ค‘๊ฐ„ ์ƒ๋žต) ...\n" + full_content[-15000:]
else:
full_content_preview = full_content
prompt = f"""๋‹ค์Œ ์›น์†Œ์„ค์˜ {episode_title} ํšŒ์ฐจ์—์„œ ์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•ด์ฃผ์„ธ์š”.
{parent_chunk_info if parent_chunk_info else ""}
์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ):
{full_content_preview[:50000] if full_content_preview else "์—†์Œ"}
๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ ({episode_title}):
{content_preview}{truncation_note}
๋‹ค์Œ ํ˜•์‹์œผ๋กœ JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”:
{{
"entities": {{
"characters": [
{{
"name": "์ธ๋ฌผ ์ด๋ฆ„",
"type": "์ธ๋ฌผ",
"description": "์ธ๋ฌผ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",
"role": "์ด ํšŒ์ฐจ์—์„œ์˜ ์—ญํ•  (์˜ˆ: ์ฃผ์ธ๊ณต, ์กฐ์—ฐ, ์•…์—ญ ๋“ฑ)"
}}
],
"locations": [
{{
"name": "์žฅ์†Œ ์ด๋ฆ„",
"type": "์žฅ์†Œ",
"description": "์žฅ์†Œ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",
"category": "์žฅ์†Œ ์œ ํ˜• (์˜ˆ: ๋„์‹œ, ๊ฑด๋ฌผ, ์ฐจ์› ๋“ฑ)"
}}
]
}},
"relationships": [
{{
"source": "๊ด€๊ณ„์˜ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ„)",
"target": "๊ด€๊ณ„์˜ ๋Œ€์ƒ (์ธ๋ฌผ ์ด๋ฆ„ ๋˜๋Š” ์žฅ์†Œ ์ด๋ฆ„)",
"type": "๊ด€๊ณ„ ์œ ํ˜• (์˜ˆ: ์นœ๊ตฌ, ์ , ์—ฐ์ธ, ๊ฑฐ์ฃผ์ง€, ๋ฐฉ๋ฌธ์ง€ ๋“ฑ)",
"description": "๊ด€๊ณ„์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",
"event": "์ด ๊ด€๊ณ„๋ฅผ ํ˜•์„ฑํ•˜๊ฑฐ๋‚˜ ๋ณ€ํ™”์‹œํ‚จ ์‚ฌ๊ฑด (์žˆ๋Š” ๊ฒฝ์šฐ)"
}}
],
"events": [
{{
"name": "์‚ฌ๊ฑด ์ด๋ฆ„",
"description": "์‚ฌ๊ฑด์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",
"participants": ["๊ด€๋ จ ์ธ๋ฌผ1", "๊ด€๋ จ ์ธ๋ฌผ2"],
"location": "์‚ฌ๊ฑด์ด ๋ฐœ์ƒํ•œ ์žฅ์†Œ",
"significance": "์‚ฌ๊ฑด์˜ ์ค‘์š”๋„ (์˜ˆ: ์ฃผ์š” ์‚ฌ๊ฑด, ๋ถ€์ˆ˜ ์‚ฌ๊ฑด ๋“ฑ)"
}}
]
}}
์ค‘์š” ์‚ฌํ•ญ:
1. ์—”ํ‹ฐํ‹ฐ๋Š” ์ด ํšŒ์ฐจ์—์„œ ์‹ค์ œ๋กœ ๋“ฑ์žฅํ•˜๊ฑฐ๋‚˜ ์–ธ๊ธ‰๋œ ์ธ๋ฌผ๊ณผ ์žฅ์†Œ๋งŒ ์ถ”์ถœํ•˜์„ธ์š”.
2. ๊ด€๊ณ„๋Š” ์ด ํšŒ์ฐจ์—์„œ ์ƒˆ๋กœ ํ˜•์„ฑ๋˜๊ฑฐ๋‚˜ ๋ณ€ํ™”ํ•œ ๊ด€๊ณ„๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ์ถ”์ถœํ•˜์„ธ์š”.
3. ์‚ฌ๊ฑด์€ ์ด ํšŒ์ฐจ์—์„œ ์ผ์–ด๋‚œ ๊ตฌ์ฒด์ ์ธ ์‚ฌ๊ฑด๋“ค์„ ์ถ”์ถœํ•˜์„ธ์š”.
4. ์‘๋‹ต์€ ์˜ค์ง JSON ํ˜•์‹๋งŒ ์‚ฌ์šฉํ•˜๊ณ , ๋‹ค๋ฅธ ์„ค๋ช…์ด๋‚˜ ๋งˆํฌ๋‹ค์šด์€ ํฌํ•จํ•˜์ง€ ๋งˆ์„ธ์š”.
5. JSON ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅธ์ง€ ๋ฐ˜๋“œ์‹œ ํ™•์ธํ•˜์„ธ์š” (๋”ฐ์˜ดํ‘œ ์ด์Šค์ผ€์ดํ”„ ๋“ฑ).
6. ๋ฐฐ์—ด์ด ๋น„์–ด์žˆ์„ ๊ฒฝ์šฐ ๋นˆ ๋ฐฐ์—ด []๋กœ ํ‘œ์‹œํ•˜์„ธ์š”.
7. ํ•„๋“œ ๊ฐ’์ด ์—†๋Š” ๊ฒฝ์šฐ null ๋Œ€์‹  ๋นˆ ๋ฌธ์ž์—ด "" ๋˜๋Š” ๋นˆ ๋ฐฐ์—ด []์„ ์‚ฌ์šฉํ•˜์„ธ์š”."""
return prompt