File size: 4,096 Bytes
0adcd58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""

Graph Extraction ํ”„๋กฌํ”„ํŠธ

์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•˜๋Š” GraphRAG ๊ธฐ๋ฐ˜ ํ”„๋กฌํ”„ํŠธ

"""

from typing import Optional


def get_graph_extraction_prompt(

    episode_content: str,

    episode_title: str,

    full_content: Optional[str] = None,

    parent_chunk_info: Optional[str] = None,

    max_length: int = 10000

) -> str:
    """

    Graph Extraction์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ

    

    Args:

        episode_content: ๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ

        episode_title: ํšŒ์ฐจ ์ œ๋ชฉ (์˜ˆ: '1ํ™”', '2ํ™”')

        full_content: ์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ)

        parent_chunk_info: Parent Chunk ์ •๋ณด (์„ ํƒ์‚ฌํ•ญ)

        max_length: ํ”„๋กฌํ”„ํŠธ์— ํฌํ•จํ•  ์ตœ๋Œ€ ํ…์ŠคํŠธ ๊ธธ์ด

    

    Returns:

        ํ”„๋กฌํ”„ํŠธ ๋ฌธ์ž์—ด

    """
    # ํšŒ์ฐจ ๋‚ด์šฉ ๊ธธ์ด ์ œํ•œ
    content_preview = episode_content[:max_length]
    is_truncated = len(episode_content) > max_length
    
    truncation_note = "\n(์ฐธ๊ณ : ํšŒ์ฐจ ๋‚ด์šฉ์ด ๊ธธ์–ด ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.)" if is_truncated else ""
    
    # ์ „์ฒด ๋‚ด์šฉ ์ฐธ๊ณ ์šฉ (์„ ํƒ์‚ฌํ•ญ)
    full_content_preview = ""
    if full_content:
        # ์ „์ฒด ๋‚ด์šฉ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๊ณผ ๋’ท๋ถ€๋ถ„ ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ
        if len(full_content) > 30000:
            full_content_preview = full_content[:15000] + "\n... (์ค‘๊ฐ„ ์ƒ๋žต) ...\n" + full_content[-15000:]
        else:
            full_content_preview = full_content
    
    prompt = f"""๋‹ค์Œ ์›น์†Œ์„ค์˜ {episode_title} ํšŒ์ฐจ์—์„œ ์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•ด์ฃผ์„ธ์š”.



{parent_chunk_info if parent_chunk_info else ""}



์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ):

{full_content_preview[:50000] if full_content_preview else "์—†์Œ"}



๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ ({episode_title}):

{content_preview}{truncation_note}



๋‹ค์Œ ํ˜•์‹์œผ๋กœ JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”:



{{

    "entities": {{

        "characters": [

            {{

                "name": "์ธ๋ฌผ ์ด๋ฆ„",

                "type": "์ธ๋ฌผ",

                "description": "์ธ๋ฌผ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",

                "role": "์ด ํšŒ์ฐจ์—์„œ์˜ ์—ญํ•  (์˜ˆ: ์ฃผ์ธ๊ณต, ์กฐ์—ฐ, ์•…์—ญ ๋“ฑ)"

            }}

        ],

        "locations": [

            {{

                "name": "์žฅ์†Œ ์ด๋ฆ„",

                "type": "์žฅ์†Œ",

                "description": "์žฅ์†Œ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",

                "category": "์žฅ์†Œ ์œ ํ˜• (์˜ˆ: ๋„์‹œ, ๊ฑด๋ฌผ, ์ฐจ์› ๋“ฑ)"

            }}

        ]

    }},

    "relationships": [

        {{

            "source": "๊ด€๊ณ„์˜ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ„)",

            "target": "๊ด€๊ณ„์˜ ๋Œ€์ƒ (์ธ๋ฌผ ์ด๋ฆ„ ๋˜๋Š” ์žฅ์†Œ ์ด๋ฆ„)",

            "type": "๊ด€๊ณ„ ์œ ํ˜• (์˜ˆ: ์นœ๊ตฌ, ์ , ์—ฐ์ธ, ๊ฑฐ์ฃผ์ง€, ๋ฐฉ๋ฌธ์ง€ ๋“ฑ)",

            "description": "๊ด€๊ณ„์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",

            "event": "์ด ๊ด€๊ณ„๋ฅผ ํ˜•์„ฑํ•˜๊ฑฐ๋‚˜ ๋ณ€ํ™”์‹œํ‚จ ์‚ฌ๊ฑด (์žˆ๋Š” ๊ฒฝ์šฐ)"

        }}

    ],

    "events": [

        {{

            "name": "์‚ฌ๊ฑด ์ด๋ฆ„",

            "description": "์‚ฌ๊ฑด์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",

            "participants": ["๊ด€๋ จ ์ธ๋ฌผ1", "๊ด€๋ จ ์ธ๋ฌผ2"],

            "location": "์‚ฌ๊ฑด์ด ๋ฐœ์ƒํ•œ ์žฅ์†Œ",

            "significance": "์‚ฌ๊ฑด์˜ ์ค‘์š”๋„ (์˜ˆ: ์ฃผ์š” ์‚ฌ๊ฑด, ๋ถ€์ˆ˜ ์‚ฌ๊ฑด ๋“ฑ)"

        }}

    ]

}}



์ค‘์š” ์‚ฌํ•ญ:

1. ์—”ํ‹ฐํ‹ฐ๋Š” ์ด ํšŒ์ฐจ์—์„œ ์‹ค์ œ๋กœ ๋“ฑ์žฅํ•˜๊ฑฐ๋‚˜ ์–ธ๊ธ‰๋œ ์ธ๋ฌผ๊ณผ ์žฅ์†Œ๋งŒ ์ถ”์ถœํ•˜์„ธ์š”.

2. ๊ด€๊ณ„๋Š” ์ด ํšŒ์ฐจ์—์„œ ์ƒˆ๋กœ ํ˜•์„ฑ๋˜๊ฑฐ๋‚˜ ๋ณ€ํ™”ํ•œ ๊ด€๊ณ„๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ์ถ”์ถœํ•˜์„ธ์š”.

3. ์‚ฌ๊ฑด์€ ์ด ํšŒ์ฐจ์—์„œ ์ผ์–ด๋‚œ ๊ตฌ์ฒด์ ์ธ ์‚ฌ๊ฑด๋“ค์„ ์ถ”์ถœํ•˜์„ธ์š”.

4. ์‘๋‹ต์€ ์˜ค์ง JSON ํ˜•์‹๋งŒ ์‚ฌ์šฉํ•˜๊ณ , ๋‹ค๋ฅธ ์„ค๋ช…์ด๋‚˜ ๋งˆํฌ๋‹ค์šด์€ ํฌํ•จํ•˜์ง€ ๋งˆ์„ธ์š”.

5. JSON ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅธ์ง€ ๋ฐ˜๋“œ์‹œ ํ™•์ธํ•˜์„ธ์š” (๋”ฐ์˜ดํ‘œ ์ด์Šค์ผ€์ดํ”„ ๋“ฑ).

6. ๋ฐฐ์—ด์ด ๋น„์–ด์žˆ์„ ๊ฒฝ์šฐ ๋นˆ ๋ฐฐ์—ด []๋กœ ํ‘œ์‹œํ•˜์„ธ์š”.

7. ํ•„๋“œ ๊ฐ’์ด ์—†๋Š” ๊ฒฝ์šฐ null ๋Œ€์‹  ๋นˆ ๋ฌธ์ž์—ด "" ๋˜๋Š” ๋นˆ ๋ฐฐ์—ด []์„ ์‚ฌ์šฉํ•˜์„ธ์š”."""
    
    return prompt