File size: 17,976 Bytes
47e7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebf9191
47e7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebf9191
47e7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebf9191
47e7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebf9191
47e7138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# -*- coding: utf-8 -*-
"""
inject_fintech_gold_data.py โ€” ํ•€ํ…Œํฌ/๊ธˆ์œต AI ๊ณจ๋“œ ๋ฐ์ดํ„ฐ ์ฃผ์ž… ์Šคํฌ๋ฆฝํŠธ
================================================================
์ž‘์„ฑ์ผ: 2026-05-20
์ €์ž‘๊ถŒ: (c) 2026 FinGraph Team All Rights Reserved.

๋ณธ ์Šคํฌ๋ฆฝํŠธ๋Š” ์ฑ—๋ด‡์˜ ์ฃผ์ œ๋ฅผ 100% ๊ธˆ์œต/ํ•€ํ…Œํฌ AI ์ „๋ฌธ ๋„๋ฉ”์ธ์œผ๋กœ ์—„๊ฒฉ ๊ฐœํŽธํ•˜๊ธฐ ์œ„ํ•ด,
์‹ค์ œ ๋™์ž‘์„ ๋ณด์žฅํ•˜๋Š” 4๋Œ€ ์‹œ๋‚˜๋ฆฌ์˜ค ๋งž์ถคํ˜• ๊ธˆ์œต ๋‰ด์Šค ๊ธฐ์‚ฌ, ์—”ํ‹ฐํ‹ฐ, ์ฒญํ‚น ๋ฐ์ดํ„ฐ ๋ฐ 
1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ์„ Neo4j AuraDB์— ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ƒ์„ฑํ•˜์—ฌ ์™„๋ฒฝํ•˜๊ฒŒ ์ ์žฌํ•ฉ๋‹ˆ๋‹ค.
"""

import os
import sys

import dotenv
import neo4j
from openai import OpenAI

dotenv.load_dotenv()

# ์œˆ๋„์šฐ ์ฝ˜์†” UTF-8 ์ถœ๋ ฅ ์žฌ์„ค์ •
if hasattr(sys.stdout, 'reconfigure'):
    sys.stdout.reconfigure(encoding='utf-8')


def get_neo4j_driver() -> neo4j.Driver:
    """AuraDB ์ ‘์†์„ ์œ„ํ•ด Client ID/Secret ์šฐ์„  ์ž๋™ fallback ๋“œ๋ผ์ด๋ฒ„ ๋นŒ๋”"""
    uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
    client_id = os.getenv("NEO4J_CLIENT_ID")
    client_secret = os.getenv("NEO4J_CLIENT_SECRET")
    
    if client_id and client_secret:
        try:
            d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
            d.verify_connectivity()
            return d
        except Exception:
            pass  # Fallback to Username/Password
            
    username = os.getenv("NEO4J_USERNAME", "neo4j")
    password = os.getenv("NEO4J_PASSWORD", "password")
    d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
    d.verify_connectivity()
    return d


# OpenAI API ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("[FAIL] OPENAI_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
    sys.exit(1)
client = OpenAI(api_key=api_key)


def get_embedding(text: str) -> list[float]:
    """1536์ฐจ์›์˜ text-embedding-3-small ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ์„ ์‹ค์‹œ๊ฐ„ ์ƒ์„ฑ"""
    text_clean = text.replace("\n", " ")
    response = client.embeddings.create(
        input=[text_clean],
        model="text-embedding-3-small"
    )
    return response.data[0].embedding


# 4๋Œ€ ํ•€ํ…Œํฌ/๊ธˆ์œต AI ๊ณจ๋“œ ๋ฐ์ดํ„ฐ์…‹ ๋ช…์„ธ
GOLD_ARTICLES = [
    {
        "article_id": "ART_GOLD_001",
        "title": "์‹ ํ•œ์€ํ–‰, ์ƒ์„ฑํ˜• AI ํƒ‘์žฌ ์ฐจ์„ธ๋Œ€ ๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ € '์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค' ์ „๊ฒฉ ์ถœ์‹œ",
        "url": "https://magazine.hankyung.com/business/article/202604165507b",
        "source": "์—ฐํ•ฉ๋‰ด์Šค",
        "author": "๊น€๊ธˆ์œต ๊ธฐ์ž",
        "published_date": "2026-05-20 09:00",
        "content": (
            "์‹ ํ•œ์€ํ–‰์ด ์ƒ์„ฑํ˜• AI ๊ธฐ์ˆ ์„ ๊ฒฐํ•ฉํ•˜์—ฌ ๊ฐœ์ธ ๋งž์ถคํ˜• ์ž์‚ฐ๊ด€๋ฆฌ ์„œ๋น„์Šค๋ฅผ ๋Œ€ํญ ๊ฐ•ํ™”ํ•œ "
            "์ฐจ์„ธ๋Œ€ ๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ € ์†”๋ฃจ์…˜ '์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค'๋ฅผ ๊ณต์‹ ์ถœ์‹œํ–ˆ๋‹ค.\n"
            "์ด๋ฒˆ ์„œ๋น„์Šค๋Š” ์‹ค์‹œ๊ฐ„ ๊ธˆ์œต ์‹œ์žฅ ๋น…๋ฐ์ดํ„ฐ์™€ ๊ณ ๊ฐ์˜ ํˆฌ์ž ์„ฑํ–ฅ์„ ๋‹ค์ฐจ์› ๋ถ„์„ํ•˜๋Š” "
            "AI ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•˜๋ฉฐ, ์ž์‚ฐ ๋ฐฐ๋ถ„ ๋น„์ค‘์„ ๋™์ ์œผ๋กœ ์žฌ์กฐ์ •(๋ฆฌ๋ฐธ๋Ÿฐ์‹ฑ)ํ•ด ์ค€๋‹ค.\n"
            "ํŠนํžˆ ์ดˆ๊ฑฐ๋Œ€ ์–ธ์–ด๋ชจ๋ธ(LLM)์ด ์ ์šฉ๋˜์–ด ๋”ฑ๋”ฑํ•˜๊ณ  ์–ด๋ ค์šด ํˆฌ์ž ๋ณด๊ณ ์„œ๋ฅผ ์ž์—ฐ์–ด ํ˜•ํƒœ์˜ "
            "์นœ์ ˆํ•œ ์ž์‚ฐ ์ข…ํ•ฉ ๋ธŒ๋ฆฌํ•‘ ๋ณด๊ณ ์„œ๋กœ ์ž๋™ ์š”์•ฝํ•˜์—ฌ ์ „๋‹ฌํ•˜๋Š” ํ˜์‹ ์„ ์ด๋ค„๋ƒˆ๋‹ค.\n"
            "๊ธˆ์œต ์†Œ๋น„์ž๋“ค์€ ์‹ ํ•œ ์ (SOL) ๋ฑ…ํ‚น ์•ฑ์„ ํ†ตํ•ด ๊ฐ„ํŽธํ•˜๊ฒŒ ํฌํŠธํด๋ฆฌ์˜ค ์ œ์•ˆ์„ ๋ฐ›๊ณ  "
            "๋””์ง€ํ„ธ ์ž์‚ฐ ๊ด€๋ฆฌ๋ฅผ ๊ฒฝํ—˜ํ•  ์ˆ˜ ์žˆ๋‹ค."
        ),
        "entities": [
            {"name": "์‹ ํ•œ์€ํ–‰", "type": "AICompany", "description": "์ƒ์„ฑํ˜• AI ์ž์‚ฐ๊ด€๋ฆฌ ๋ฐ ๊ธˆ์œต ํ…Œํฌ๋ฅผ ์„ ๋„ํ•˜๋Š” ์‹œ์ค‘์€ํ–‰"},
            {"name": "๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €", "type": "AITechnology", "description": "์•Œ๊ณ ๋ฆฌ์ฆ˜ ๊ธฐ๋ฐ˜ ๊ฐœ์ธ ๋งž์ถคํ˜• ํˆฌ์ž ํฌํŠธํด๋ฆฌ์˜ค ๊ตฌ์„ฑ ๊ธฐ์ˆ "},
            {"name": "์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค", "type": "AIService", "description": "์ƒ์„ฑํ˜• AI ๊ฒฐํ•ฉ ์ฐจ์„ธ๋Œ€ ๋ชจ๋ฐ”์ผ ์ž์‚ฐ๊ด€๋ฆฌ ์†”๋ฃจ์…˜"},
            {"name": "์ž์‚ฐ๊ด€๋ฆฌ", "type": "AIField", "description": "๋””์ง€ํ„ธ ๊ธฐ์ˆ ๊ณผ ๋งˆ์ด๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜์˜ ๋งž์ถคํ˜• ๊ฐœ์ธ ๊ธˆ์œต ์„œ๋น„์Šค"}
        ],
        "relationships": [
            ("์‹ ํ•œ์€ํ–‰", "DEVELOPS", "๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €"),
            ("์‹ ํ•œ์€ํ–‰", "DEVELOPS", "์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค"),
            ("๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €", "APPLIES", "์ž์‚ฐ๊ด€๋ฆฌ"),
            ("์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค", "USED_IN", "์ž์‚ฐ๊ด€๋ฆฌ"),
            ("์‹ ํ•œ์€ํ–‰", "PARTNERS_WITH", "์นด์นด์˜คํŽ˜์ด")  # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
        ]
    },
    {
        "article_id": "ART_GOLD_002",
        "title": "์นด์นด์˜คํŽ˜์ด, ๋Œ€์•ˆ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ AI ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋ชจ๋ธ '์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€' ๊ตฌ์ถ• ์™„๋ฃŒ",
        "url": "https://www.newsis.com/view/NISX20260520_0003637276",
        "source": "ํ•œ๊ตญ๊ฒฝ์ œ",
        "author": "์ดํŽ˜์ด ๊ธฐ์ž",
        "published_date": "2026-05-20 10:15",
        "content": (
            "์นด์นด์˜คํŽ˜์ด๊ฐ€ ๋น…๋ฐ์ดํ„ฐ์™€ ๋จธ์‹ ๋Ÿฌ๋‹/๋”ฅ๋Ÿฌ๋‹์„ ์œตํ•ฉํ•˜์—ฌ ํ˜์‹ ์ ์ธ AI ๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€ ์‹œ์Šคํ…œ์ธ "
            "'์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€' ์†”๋ฃจ์…˜์„ ๊ฐœ๋ฐœ ๋ฐ ๊ตฌ์ถ•์„ ์™„๋ฃŒํ•˜๊ณ  ํ˜„์žฅ์— ์ ์šฉํ–ˆ๋‹ค.\n"
            "์ด ์‹œ์Šคํ…œ์€ ๊ธฐ์กด ์‹ ์šฉํ‰๊ฐ€์‚ฌ(CB)์˜ ์ด๋ ฅ ์ค‘์‹ฌ ํ‰๊ฐ€ ๋ชจ๋ธ์—์„œ ์†Œ์™ธ๋˜์—ˆ๋˜ ์ฒญ๋…„์ธต๊ณผ "
            "๊ธˆ์œต์ด๋ ฅ ๋ถ€์กฑ์ž(์”ฌํŒŒ์ผ๋Ÿฌ)๋“ค์„ ์œ„ํ•ด ์นด์นด์˜คํŽ˜์ด ํ”Œ๋žซํผ ๋‚ด ๊ฒฐ์ œ ํŒจํ„ด, ์†ก๊ธˆ ๋ฐ ์ง€์ถœ ์„ฑํ–ฅ, "
            "ํŽ˜์ด๋จธ๋‹ˆ ์ž”์•ก ๊ด€๋ฆฌ ์ถ”์ด ๋“ฑ ๋น„๊ธˆ์œต ๋Œ€์•ˆ ๋ฐ์ดํ„ฐ๋ฅผ ์ •๊ตํ•œ ๋”ฅ๋Ÿฌ๋‹๋ง์œผ๋กœ ๊ต์ฐจ ๋ถ„์„ํ•œ๋‹ค.\n"
            "AI ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋„์ž…์„ ํ†ตํ•ด ์”ฌํŒŒ์ผ๋Ÿฌ๋“ค์˜ ๋Œ€์ถœ ์Šน์ธ ์žฅ๋ฒฝ์€ 30% ์ด์ƒ ๋‚ฎ์ถ”๋Š” ํ•œํŽธ, "
            "AI์˜ ์ •ํ™•ํ•œ ๋ฆฌ์Šคํฌ ํ”„๋กœํŒŒ์ผ๋ง ๊ธฐ์ˆ ์„ ํ™œ์šฉํ•ด ์—ฐ์ฒด ๋ฐ ๊ธˆ์œต ๋ถ€์‹ค๋ฅ ์„ ํฌ๊ฒŒ ์–ต์ œํ•˜๋Š” ํšจ๊ณผ๋ฅผ ์ฆ๋ช…ํ–ˆ๋‹ค."
        ),
        "entities": [
            {"name": "์นด์นด์˜คํŽ˜์ด", "type": "AICompany", "description": "๋Œ€์•ˆ ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋ฐ ํ•€ํ…Œํฌ ํ˜์‹ ์„ ์ด๋„๋Š” ์ข…ํ•ฉ ๋ชจ๋ฐ”์ผ ๊ฒฐ์ œ ํ”Œ๋žซํผ"},
            {"name": "๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€", "type": "AITechnology", "description": "๋น„๊ธˆ์œต ๋Œ€์•ˆ ๋ฐ์ดํ„ฐ๋ฅผ ๋”ฅ๋Ÿฌ๋‹์œผ๋กœ ํ•™์Šตํ•˜์—ฌ ์‹ ์šฉ๋„๋ฅผ ์ธก์ •ํ•˜๋Š” ์ฐจ์„ธ๋Œ€ ์‹ ์šฉํ‰๊ฐ€ ๊ธฐ์ˆ "},
            {"name": "์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€", "type": "AIService", "description": "์”ฌํŒŒ์ผ๋Ÿฌ๋ฅผ ์œ„ํ•œ ๋”ฅ๋Ÿฌ๋‹ ๊ธฐ๋ฐ˜ ๋Œ€์•ˆ ๋Œ€์ถœ ์‹ฌ์‚ฌ ๊ณ ๋„ํ™” ์†”๋ฃจ์…˜"},
            {"name": "๋Œ€์ถœ์‹ฌ์‚ฌ", "type": "AIField", "description": "๋ฆฌ์Šคํฌ ํ”„๋กœํŒŒ์ผ๋ง ๋ฐ ํ•€ํ…Œํฌ ํ”Œ๋žซํผ ์—ฐ๊ณ„ ๊ธˆ์œต ์Šน์ธ ํ”„๋กœ์„ธ์Šค"}
        ],
        "relationships": [
            ("์นด์นด์˜คํŽ˜์ด", "DEVELOPS", "๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€"),
            ("์นด์นด์˜คํŽ˜์ด", "DEVELOPS", "์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€"),
            ("๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€", "APPLIES", "๋Œ€์ถœ์‹ฌ์‚ฌ"),
            ("์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€", "USED_IN", "๋Œ€์ถœ์‹ฌ์‚ฌ"),
            ("์นด์นด์˜คํŽ˜์ด", "PARTNERS_WITH", "ํ† ์Šค๋ฑ…ํฌ")  # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
        ]
    },
    {
        "article_id": "ART_GOLD_003",
        "title": "ํ† ์Šค๋ฑ…ํฌ, ์ƒ์„ฑํ˜• AI ๊ฒฐํ•ฉํ•œ ๋ณด์ด์Šคํ”ผ์‹ฑ ์‹ค์‹œ๊ฐ„ ํƒ์ง€ ์‹œ์Šคํ…œ 'ํ† ์Šค AI FDS'๋กœ ๊ธˆ์œต ์‚ฌ๊ธฐ ์›์ฒœ ์ฐจ๋‹จ",
        "url": "https://www.dt.co.kr/article/12057506",
        "source": "๋งค์ผ๊ฒฝ์ œ",
        "author": "๋ฐ•ํ† ์Šค ๊ธฐ์ž",
        "published_date": "2026-05-20 11:30",
        "content": (
            "ํ† ์Šค๋ฑ…ํฌ๊ฐ€ ๊ธˆ์œต๊ถŒ ์ตœ์ดˆ๋กœ ์ด์ƒ๊ธˆ์œต๊ฑฐ๋ž˜ํƒ์ง€์‹œ์Šคํ…œ(FDS)์— ์ƒ์„ฑํ˜• AI ์—”์ง„์„ ์žฅ์ฐฉํ•œ "
            "'ํ† ์Šค AI FDS'๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋Ÿฐ์นญํ•˜์—ฌ ๋ณด์ด์Šคํ”ผ์‹ฑ ๋ฐ ์Šค๋งˆํŠธ ํ”ผ์‹ฑ์„ ์›์ฒœ ์ฐจ๋‹จํ•˜๊ณ  ์žˆ๋‹ค.\n"
            "์ด ์‹œ์Šคํ…œ์€ ์‹ค์‹œ๊ฐ„์œผ๋กœ ๊ณ ์† ์œ ์ž…๋˜๋Š” ๋น„๋Œ€๋ฉด ๊ณ„์ขŒ ์ด์ฒด ๋ฐ ์›๊ฒฉ ์ œ์–ด ์•ฑ ๊ตฌ๋™ ๊ฑฐ๋ž˜ ๋‚ด์—ญ์„ "
            "์ดˆ๊ณ ์† ๋ถ„์„ํ•˜์—ฌ ๊ธˆ์œต์‚ฌ๊ธฐ ์ง•ํ›„๋ฅผ ์‹ค์‹œ๊ฐ„ ํƒ์ง€ํ•ด ๋‚ธ๋‹ค.\n"
            "ํ”ผ์‹ฑ ์˜์‹ฌ ๊ฑฐ๋ž˜๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด AI ์—”์ง„์ด ์ฆ‰์‹œ ํ•ด๋‹น ๊ณ„์ขŒ์˜ ์ด์ฒด๋ฅผ 0.1์ดˆ ๋‚ด๋กœ ๋™๊ฒฐ ์กฐ์น˜ํ•˜๊ณ , "
            "ํ”ผํ•ด์ž์—๊ฒŒ ์‹ค์‹œ๊ฐ„ ๊ธด๊ธ‰ ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€์™€ ๊ฐ€์ด๋“œ ์Œ์„ฑ์„ ์ƒ์„ฑํ˜• AI๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๋ฐœ์†กํ•œ๋‹ค.\n"
            "์ด๋ฅผ ํ†ตํ•ด ํ† ์Šค๋ฑ…ํฌ๋Š” ์ทจ์•ฝ๊ณ„์ธต์˜ ๋””์ง€ํ„ธ ๋ณด์ด์Šคํ”ผ์‹ฑ ํ”ผํ•ด ๋ฐœ์ƒ ๊ฑด์ˆ˜๋ฅผ ์˜ˆ๋…„ ๋Œ€๋น„ "
            "70% ์ด์ƒ ํš๊ธฐ์ ์œผ๋กœ ๋‚ฎ์ถ”๋Š” ์‚ฌํšŒ์  ํŒŒ๊ธ‰ ํšจ๊ณผ๋ฅผ ๊ฑฐ๋‘์—ˆ๋‹ค."
        ),
        "entities": [
            {"name": "ํ† ์Šค๋ฑ…ํฌ", "type": "AICompany", "description": "๋””์ง€ํ„ธ ๊ธˆ์œต์˜ ์žฅ๋ฒฝ์„ ๋‚ฎ์ถ”๊ณ  ๊ฐ•๋ ฅํ•œ FDS ์˜ˆ๋ฐฉ์ฑ…์„ ์ œ๊ณตํ•˜๋Š” ๋ชจ๋ฐ”์ผ ์ธํ„ฐ๋„ท์ „๋ฌธ์€ํ–‰"},
            {"name": "FDS", "type": "AITechnology", "description": "์‹ค์‹œ๊ฐ„ ๊ฑฐ๋ž˜ ํŒจํ„ด์˜ ๋น„์ •์ƒ ์œ ๋ฌด๋ฅผ AI๋กœ ํƒ์ง€ํ•˜๋Š” ์ด์ƒ๊ธˆ์œต๊ฑฐ๋ž˜ํƒ์ง€ ๊ธฐ์ˆ "},
            {"name": "ํ† ์Šค AI FDS", "type": "AIService", "description": "์ƒ์„ฑํ˜• AI ๊ธฐ๋ฐ˜ ๋ณด์ด์Šคํ”ผ์‹ฑ ๋ฐ ์›๊ฒฉ์ œ์–ด ์ฐจ๋‹จ ๊ฒฐํ•ฉ ๊ธˆ์œต ๋ณด์•ˆ ์‹œ์Šคํ…œ"},
            {"name": "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ", "type": "AIField", "description": "๋ณด์ด์Šคํ”ผ์‹ฑ ์ฐจ๋‹จ ๋ฐ ๋””์ง€ํ„ธ ๊ธˆ์œต ์•ˆ์‹ฌ ๊ฑฐ๋ž˜ ์„œ๋น„์Šค ๋ณด์•ˆ ์˜์—ญ"}
        ],
        "relationships": [
            ("ํ† ์Šค๋ฑ…ํฌ", "DEVELOPS", "FDS"),
            ("ํ† ์Šค๋ฑ…ํฌ", "DEVELOPS", "ํ† ์Šค AI FDS"),
            ("FDS", "APPLIES", "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ"),
            ("ํ† ์Šค AI FDS", "USED_IN", "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ"),
            ("ํ† ์Šค๋ฑ…ํฌ", "PARTNERS_WITH", "์‹ ํ•œ์€ํ–‰")  # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
        ]
    },
    {
        "article_id": "ART_GOLD_004",
        "title": "๋„ค์ด๋ฒ„ํŽ˜์ด, ๋งˆ์ด๋ฐ์ดํ„ฐ์™€ ์ดˆ๊ฑฐ๋Œ€ AI ๊ฒฐํ•ฉํ•œ ๊ฐœ์ธ ๋งž์ถคํ˜• '๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ' ์ถœ์‹œ",
        "url": "https://www.thevaluenews.co.kr/news/view.php?idx=198871",
        "source": "๋””์ง€ํ„ธ๋ฐ์ผ๋ฆฌ",
        "author": "์ตœ๋ฐ์ดํ„ฐ ๊ธฐ์ž",
        "published_date": "2026-05-20 14:00",
        "content": (
            "๋„ค์ด๋ฒ„ํŽ˜์ด๊ฐ€ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ธํ”„๋ผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ตญ๋‚ด ์ตœ๊ณ ์˜ ์ดˆ๊ฑฐ๋Œ€ ์–ธ์–ด๋ชจ๋ธ์„ ๊ฒฐํ•ฉํ•œ "
            "์Šค๋งˆํŠธ ์ž์‚ฐ ๋ถ„์„ ์ฑ—๋ด‡ ์„œ๋น„์Šค์ธ '๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ'๋ฅผ ์ •์‹ ์ถœ์‹œํ–ˆ๋‹ค.\n"
            "์ด ํ”Œ๋žซํผ์€ ํฉ์–ด์ง„ ๊ณ ๊ฐ์˜ ์€ํ–‰, ์นด๋“œ์‚ฌ, ์ฆ๊ถŒ์‚ฌ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ •๋ณด๋ฅผ ํ•œ๋ฐ ๋ชจ์€ ๋’ค "
            "๊ฐœ๊ฐœ์ธ์˜ ์†Œ๋น„ ํ˜„ํ™ฉ ๋ถ„์„, ์ง€์ถœ ๋‹ค์ด์–ดํŠธ ๊ฐ€์ด๋“œ, ์ตœ์ ์˜ ๊ธˆ์œต ์ƒํ’ˆ ๊ธˆ๋ฆฌ ๋น„๊ต ํ˜œํƒ์„ ์ œ๊ณตํ•œ๋‹ค.\n"
            "์ดˆ๊ฑฐ๋Œ€ AI ๊ธฐ์ˆ ์ด ์ ‘๋ชฉ๋˜์–ด ๋‹จ์ˆœ ์ˆซ์ž ๋‚˜์—ด์— ๊ทธ์ณค๋˜ ๊ธฐ์กด ๋งˆ์ด๋ฐ์ดํ„ฐ ๋ถ„์„ ํ‹€์„ ๋ฒ—์–ด๋‚˜ "
            "์ ˆ์„ธ ๋น„๋ฒ•์ด๋‚˜ ์ด์ž ์ ˆ์•ฝ ๊ฐ€์ด๋“œ๋ฅผ ์นœ๊ทผํ•œ ๋ฉ”์‹ ์ € ๋Œ€ํ™” ํ˜•ํƒœ๋กœ 24์‹œ๊ฐ„ ์ƒ๋‹ด ๋ธŒ๋ฆฌํ•‘ํ•ด ์ค€๋‹ค.\n"
            "์ด๋กœ์จ ๋„ค์ด๋ฒ„ํŽ˜์ด๋Š” ๊ณ ๋„ํ™”๋œ ์ดˆ์ •๋ฐ€ ๋งˆ์ด๋ฐ์ดํ„ฐ AI ์ž์‚ฐ ์ถ”์ฒœ ํ”Œ๋žซํผ์œผ๋กœ ํ•œ ๋‹จ๊ณ„ ๋„์•ฝํ–ˆ๋‹ค."
        ),
        "entities": [
            {"name": "๋„ค์ด๋ฒ„ํŽ˜์ด", "type": "AICompany", "description": "์ง€์ถœ ๋ถ„์„ ๋ฐ ๊ธˆ์œต ์ถ”์ฒœ ๋“ฑ ๋””์ง€ํ„ธ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ƒํƒœ๊ณ„๋ฅผ ์„ ๋„ํ•˜๋Š” ์ข…ํ•ฉ ๊ธˆ์œต ํ”Œ๋žซํผ"},
            {"name": "๋งˆ์ด๋ฐ์ดํ„ฐ", "type": "AITechnology", "description": "๋ถ„์‚ฐ๋œ ๊ธˆ์œต ๊ธฐ๊ด€ ์ •๋ณด๋ฅผ ํ•œ๋ฐ ๋ชจ์•„ ๊ฐ€์น˜๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ข…ํ•ฉ ๊ธˆ์œต ์ž์‚ฐ ๋ฐ์ดํ„ฐ ๊ธฐ์ˆ "},
            {"name": "๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ", "type": "AIService", "description": "์ดˆ๊ฑฐ๋Œ€ LLM์„ ๋งˆ์ด๋ฐ์ดํ„ฐ์™€ ๊ฒฐํ•ฉํ•˜์—ฌ ๋Œ€ํ™”ํ˜• ์ƒ๋‹ด์„ ์ œ๊ณตํ•˜๋Š” ์ž์‚ฐ ์ปจ์„คํ„ดํŠธ ์„œ๋น„์Šค"},
            {"name": "๋””์ง€ํ„ธ๊ธˆ์œต", "type": "AIField", "description": "ํ•€ํ…Œํฌ ์—ฐ๊ณ„ ๊ฐœ์ธ ์ง€์ถœ ๋‹ค์ด์–ดํŠธ ๋ฐ ๋งž์ถค ์ƒํ’ˆ ๋น„๊ต ์ถ”์ฒœ ํ˜์‹  ์˜์—ญ"}
        ],
        "relationships": [
            ("๋„ค์ด๋ฒ„ํŽ˜์ด", "DEVELOPS", "๋งˆ์ด๋ฐ์ดํ„ฐ"),
            ("๋„ค์ด๋ฒ„ํŽ˜์ด", "DEVELOPS", "๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ"),
            ("๋งˆ์ด๋ฐ์ดํ„ฐ", "APPLIES", "๋””์ง€ํ„ธ๊ธˆ์œต"),
            ("๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ", "USED_IN", "๋””์ง€ํ„ธ๊ธˆ์œต"),
            ("๋„ค์ด๋ฒ„ํŽ˜์ด", "PARTNERS_WITH", "์‹ ํ•œ์€ํ–‰")  # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
        ]
    }
]


def main():
    print("[INIT] Neo4j AuraDB ๋“œ๋ผ์ด๋ฒ„ ์ดˆ๊ธฐํ™” ๋ฐ ์—ฐ๊ฒฐ ์‹œ๋„...")
    driver = get_neo4j_driver()
    
    print("[INIT] [OK] Neo4j ์—ฐ๊ฒฐ ๋ฌด๊ฒฐ์„ฑ ๊ฒ€์ฆ ํ†ต๊ณผ")
    
    with driver.session() as session:
        # 100% ๊นจ๋—ํ•œ ์‹ ๊ทœ ๊ตฌ์ถ•์„ ์œ„ํ•ด ๊ธฐ์กด์— ๊ด€๊ณ„์„  ์—†์ด ํฉ์–ด์ ธ์žˆ๋˜ ๋…ธ๋“œ์™€ ๊ด€๊ณ„๋ฅผ ๋ชจ๋‘ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.
        print("[RESET] ๊ธฐ์กด ๊ทธ๋ž˜ํ”„ ๋ฐ์ดํ„ฐ๋ฅผ ๊นจ๋—ํ•˜๊ฒŒ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค (DETACH DELETE)...")
        session.run("MATCH (n) DETACH DELETE n")
        print("[RESET] [OK] ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์™„์ „ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
        
        print("[LOAD] 4๋Œ€ ํ•€ํ…Œํฌ ๊ณจ๋“œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ ์žฌ ํ”„๋กœ์„ธ์Šค๋ฅผ ๊ฐ€๋™ํ•ฉ๋‹ˆ๋‹ค...")
        
        # ๋ชจ๋“  ๊ณจ๋“œ ์—”ํ‹ฐํ‹ฐ์˜ ํƒ€์ž…์„ ์‚ฌ์ „์— ๋งคํ•‘ ํ…Œ์ด๋ธ”๋กœ ๊ตฌ์ถ•ํ•˜์—ฌ StopIteration ๋ฐฉ์ง€
        entity_types = {}
        for a in GOLD_ARTICLES:
            for e in a["entities"]:
                entity_types[e["name"]] = e["type"]
        
        for idx, art in enumerate(GOLD_ARTICLES, 1):
            print(f"\n({idx}/{len(GOLD_ARTICLES)}) [ART] '{art['title'][:35]}...' ์ ์žฌ ์ค‘...")
            
            # 1. Article ๋…ธ๋“œ ์ƒ์„ฑ (์ค‘๋ณต ์—†์ด MERGE)
            session.run("""
                MERGE (a:Article {article_id: $article_id})
                SET a.title = $title,
                    a.url = $url,
                    a.content = $content,
                    a.source = $source,
                    a.author = $author,
                    a.published_date = $published_date,
                    a.category = '๊ฒฝ์ œ'
            """, {
                "article_id": art["article_id"],
                "title": art["title"],
                "url": art["url"],
                "content": art["content"],
                "source": art["source"],
                "author": art["author"],
                "published_date": art["published_date"]
            })
            
            # 2. Content ์ฒญํ‚น ๋…ธ๋“œ ๋ฐ 1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ/์—ฐ๊ฒฐ
            print("  -> ์‹ค์‹œ๊ฐ„ OpenAI 1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์ค‘...")
            # ๋ฌธ์žฅ ๊ธฐ๋ฐ˜์œผ๋กœ ๋ณธ๋ฌธ์„ 2๊ฐœ ์ฒญํฌ๋กœ ์ธ์œ„ ๋ถ„ํ• ํ•˜์—ฌ ์ง€์‹ ๋ฐ€๋„ ๊ฐ•ํ™”
            paragraphs = [p.strip() for p in art["content"].split("\n") if p.strip()]
            for chunk_idx, para in enumerate(paragraphs, 1):
                chunk_id = f"{art['article_id']}_CHK_{chunk_idx}"
                embedding = get_embedding(para)
                
                # Content ๋…ธ๋“œ ์ƒ์„ฑ ๋ฐ HAS_CHUNK ์—ฐ๊ฒฐ
                session.run("""
                    MATCH (a:Article {article_id: $article_id})
                    MERGE (c:Content {chunk_id: $chunk_id})
                    SET c.chunk = $chunk,
                        c.embedding = $embedding,
                        c.article_id = $article_id
                    MERGE (a)-[:HAS_CHUNK]->(c)
                """, {
                    "article_id": art["article_id"],
                    "chunk_id": chunk_id,
                    "chunk": para,
                    "embedding": embedding
                })
            
            # 3. Entities ์ƒ์„ฑ ๋ฐ Article -[:MENTIONS]-> Entity ์—ฐ๊ฒฐ
            for ent in art["entities"]:
                # ๊ฐ ์—”ํ‹ฐํ‹ฐ ํƒ€์ž…์— ๋งž๋Š” ๋ ˆ์ด๋ธ”์„ ๊ฐ–๋Š” ๋…ธ๋“œ๋ฅผ ๋™์ ์œผ๋กœ ์ƒ์„ฑํ•˜๊ณ ,
                # ๊ณตํ†ต ๋ ˆ์ด๋ธ”๋กœ์„œ๋„ ๊ฒ€์ƒ‰ ๊ฐ€๋Šฅํ•˜๊ฒŒ ์„ค๊ณ„
                cypher_merge = f"""
                    MERGE (e:{ent['type']} {{name: $name}})
                    SET e.description = $description
                    RETURN e
                """
                session.run(cypher_merge, {"name": ent["name"], "description": ent["description"]})
                
                # Article -[:MENTIONS]-> Entity
                session.run(f"""
                    MATCH (a:Article {{article_id: $article_id}})
                    MATCH (e:{ent['type']} {{name: $name}})
                    MERGE (a)-[:MENTIONS]->(e)
                """, {"article_id": art["article_id"], "name": ent["name"]})
                
                print(f"    - [ENT] ({ent['type']}) {ent['name']} ์™„๋ฃŒ")
                
            # 4. ์—”ํ‹ฐํ‹ฐ ๊ฐ„ ์ง์ ‘ ๊ด€๊ณ„ ์—ฐ๊ฒฐ์„ฑ ์ƒ์„ฑ
            for src_name, rel_type, tgt_name in art["relationships"]:
                # ๊ตฌ์ถ•ํ•ด ๋‘” ๋งคํ•‘ ํ…Œ์ด๋ธ”์„ ์‚ฌ์šฉํ•˜์—ฌ ์ค‘๋‹จ ์˜ค๋ฅ˜ ์›์ฒœ ์˜ˆ๋ฐฉ
                src_type = entity_types.get(src_name, "AICompany")
                tgt_type = entity_types.get(tgt_name, "AICompany")
                
                cypher_rel = f"""
                    MATCH (s:{src_type} {{name: $src_name}})
                    MATCH (t:{tgt_type} {{name: $tgt_name}})
                    MERGE (s)-[:{rel_type}]->(t)
                """
                session.run(cypher_rel, {"src_name": src_name, "tgt_name": tgt_name})
                print(f"    - [REL] ({src_name})-[:{rel_type}]->({tgt_name}) ์—ฐ๊ฒฐ")
        
        # 5. ๊ด€๊ณ„ ๋ฐ€๋„ ํ†ต๊ณ„ ์ถœ๋ ฅ
        print("\n[OK] 4๋Œ€ ํ•€ํ…Œํฌ ๊ณจ๋“œ ๋ฐ์ดํ„ฐ ์ ์žฌ ์™„๋ฃŒ!")
        
        total_rels = session.run("""
            MATCH ()-[r:DEVELOPS|INVESTS_IN|PARTNERS_WITH|APPLIES|USED_IN|RELATED_TO]->() 
            RETURN count(r) as cnt
        """).single()["cnt"]
        
        total_articles = session.run("MATCH (a:Article) RETURN count(a) as cnt").single()["cnt"]
        avg_density = total_rels / total_articles if total_articles > 0 else 0
        
        print(f"[STATUS] ํ˜„์žฌ ์ ์žฌ๋œ ์ด ๊ธฐ์‚ฌ ์ˆ˜: {total_articles}๊ฐœ")
        print(f"[STATUS] ์—”ํ‹ฐํ‹ฐ ๊ฐ„ ์ง์ ‘ ๊ด€๊ณ„ ์ด์ˆ˜: {total_rels}๊ฐœ")
        print(f"[STATUS] ๊ธฐ์‚ฌ๋‹น ํ‰๊ท  ๊ด€๊ณ„์ˆ˜: {avg_density:.1f}๊ฐœ (๋ชฉํ‘œ: 3.0๊ฐœ ์ด์ƒ)")
        
    driver.close()
    print("[DONE] ํ”„๋กœ์„ธ์Šค ์ •์ƒ ์ข…๋ฃŒ")


if __name__ == "__main__":
    main()