FinGraph / scripts /inject_fintech_gold_data.py
dev-yuje's picture
refactor: clean up root directory by removing cache, old data, and moving scripts
080ff86
# -*- coding: utf-8 -*-
"""
inject_fintech_gold_data.py โ€” ํ•€ํ…Œํฌ/๊ธˆ์œต AI ๊ณจ๋“œ ๋ฐ์ดํ„ฐ ์ฃผ์ž… ์Šคํฌ๋ฆฝํŠธ
================================================================
์ž‘์„ฑ์ผ: 2026-05-20
์ €์ž‘๊ถŒ: (c) 2026 FinGraph Team All Rights Reserved.
๋ณธ ์Šคํฌ๋ฆฝํŠธ๋Š” ์ฑ—๋ด‡์˜ ์ฃผ์ œ๋ฅผ 100% ๊ธˆ์œต/ํ•€ํ…Œํฌ AI ์ „๋ฌธ ๋„๋ฉ”์ธ์œผ๋กœ ์—„๊ฒฉ ๊ฐœํŽธํ•˜๊ธฐ ์œ„ํ•ด,
์‹ค์ œ ๋™์ž‘์„ ๋ณด์žฅํ•˜๋Š” 4๋Œ€ ์‹œ๋‚˜๋ฆฌ์˜ค ๋งž์ถคํ˜• ๊ธˆ์œต ๋‰ด์Šค ๊ธฐ์‚ฌ, ์—”ํ‹ฐํ‹ฐ, ์ฒญํ‚น ๋ฐ์ดํ„ฐ ๋ฐ
1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ์„ Neo4j AuraDB์— ์‹ค์‹œ๊ฐ„์œผ๋กœ ์ƒ์„ฑํ•˜์—ฌ ์™„๋ฒฝํ•˜๊ฒŒ ์ ์žฌํ•ฉ๋‹ˆ๋‹ค.
"""
import os
import sys
import dotenv
import neo4j
from openai import OpenAI
dotenv.load_dotenv()
# ์œˆ๋„์šฐ ์ฝ˜์†” UTF-8 ์ถœ๋ ฅ ์žฌ์„ค์ •
if hasattr(sys.stdout, 'reconfigure'):
sys.stdout.reconfigure(encoding='utf-8')
def get_neo4j_driver() -> neo4j.Driver:
"""AuraDB ์ ‘์†์„ ์œ„ํ•ด Client ID/Secret ์šฐ์„  ์ž๋™ fallback ๋“œ๋ผ์ด๋ฒ„ ๋นŒ๋”"""
uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
client_id = os.getenv("NEO4J_CLIENT_ID")
client_secret = os.getenv("NEO4J_CLIENT_SECRET")
if client_id and client_secret:
try:
d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
d.verify_connectivity()
return d
except Exception:
pass # Fallback to Username/Password
username = os.getenv("NEO4J_USERNAME", "neo4j")
password = os.getenv("NEO4J_PASSWORD", "password")
d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
d.verify_connectivity()
return d
# OpenAI API ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
print("[FAIL] OPENAI_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
sys.exit(1)
client = OpenAI(api_key=api_key)
def get_embedding(text: str) -> list[float]:
"""1536์ฐจ์›์˜ text-embedding-3-small ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ์„ ์‹ค์‹œ๊ฐ„ ์ƒ์„ฑ"""
text_clean = text.replace("\n", " ")
response = client.embeddings.create(
input=[text_clean],
model="text-embedding-3-small"
)
return response.data[0].embedding
# 4๋Œ€ ํ•€ํ…Œํฌ/๊ธˆ์œต AI ๊ณจ๋“œ ๋ฐ์ดํ„ฐ์…‹ ๋ช…์„ธ
GOLD_ARTICLES = [
{
"article_id": "ART_GOLD_001",
"title": "์‹ ํ•œ์€ํ–‰, ์ƒ์„ฑํ˜• AI ํƒ‘์žฌ ์ฐจ์„ธ๋Œ€ ๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ € '์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค' ์ „๊ฒฉ ์ถœ์‹œ",
"url": "https://magazine.hankyung.com/business/article/202604165507b",
"source": "์—ฐํ•ฉ๋‰ด์Šค",
"author": "๊น€๊ธˆ์œต ๊ธฐ์ž",
"published_date": "2026-05-20 09:00",
"content": (
"์‹ ํ•œ์€ํ–‰์ด ์ƒ์„ฑํ˜• AI ๊ธฐ์ˆ ์„ ๊ฒฐํ•ฉํ•˜์—ฌ ๊ฐœ์ธ ๋งž์ถคํ˜• ์ž์‚ฐ๊ด€๋ฆฌ ์„œ๋น„์Šค๋ฅผ ๋Œ€ํญ ๊ฐ•ํ™”ํ•œ "
"์ฐจ์„ธ๋Œ€ ๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ € ์†”๋ฃจ์…˜ '์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค'๋ฅผ ๊ณต์‹ ์ถœ์‹œํ–ˆ๋‹ค.\n"
"์ด๋ฒˆ ์„œ๋น„์Šค๋Š” ์‹ค์‹œ๊ฐ„ ๊ธˆ์œต ์‹œ์žฅ ๋น…๋ฐ์ดํ„ฐ์™€ ๊ณ ๊ฐ์˜ ํˆฌ์ž ์„ฑํ–ฅ์„ ๋‹ค์ฐจ์› ๋ถ„์„ํ•˜๋Š” "
"AI ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•˜๋ฉฐ, ์ž์‚ฐ ๋ฐฐ๋ถ„ ๋น„์ค‘์„ ๋™์ ์œผ๋กœ ์žฌ์กฐ์ •(๋ฆฌ๋ฐธ๋Ÿฐ์‹ฑ)ํ•ด ์ค€๋‹ค.\n"
"ํŠนํžˆ ์ดˆ๊ฑฐ๋Œ€ ์–ธ์–ด๋ชจ๋ธ(LLM)์ด ์ ์šฉ๋˜์–ด ๋”ฑ๋”ฑํ•˜๊ณ  ์–ด๋ ค์šด ํˆฌ์ž ๋ณด๊ณ ์„œ๋ฅผ ์ž์—ฐ์–ด ํ˜•ํƒœ์˜ "
"์นœ์ ˆํ•œ ์ž์‚ฐ ์ข…ํ•ฉ ๋ธŒ๋ฆฌํ•‘ ๋ณด๊ณ ์„œ๋กœ ์ž๋™ ์š”์•ฝํ•˜์—ฌ ์ „๋‹ฌํ•˜๋Š” ํ˜์‹ ์„ ์ด๋ค„๋ƒˆ๋‹ค.\n"
"๊ธˆ์œต ์†Œ๋น„์ž๋“ค์€ ์‹ ํ•œ ์ (SOL) ๋ฑ…ํ‚น ์•ฑ์„ ํ†ตํ•ด ๊ฐ„ํŽธํ•˜๊ฒŒ ํฌํŠธํด๋ฆฌ์˜ค ์ œ์•ˆ์„ ๋ฐ›๊ณ  "
"๋””์ง€ํ„ธ ์ž์‚ฐ ๊ด€๋ฆฌ๋ฅผ ๊ฒฝํ—˜ํ•  ์ˆ˜ ์žˆ๋‹ค."
),
"entities": [
{"name": "์‹ ํ•œ์€ํ–‰", "type": "AICompany", "description": "์ƒ์„ฑํ˜• AI ์ž์‚ฐ๊ด€๋ฆฌ ๋ฐ ๊ธˆ์œต ํ…Œํฌ๋ฅผ ์„ ๋„ํ•˜๋Š” ์‹œ์ค‘์€ํ–‰"},
{"name": "๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €", "type": "AITechnology", "description": "์•Œ๊ณ ๋ฆฌ์ฆ˜ ๊ธฐ๋ฐ˜ ๊ฐœ์ธ ๋งž์ถคํ˜• ํˆฌ์ž ํฌํŠธํด๋ฆฌ์˜ค ๊ตฌ์„ฑ ๊ธฐ์ˆ "},
{"name": "์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค", "type": "AIService", "description": "์ƒ์„ฑํ˜• AI ๊ฒฐํ•ฉ ์ฐจ์„ธ๋Œ€ ๋ชจ๋ฐ”์ผ ์ž์‚ฐ๊ด€๋ฆฌ ์†”๋ฃจ์…˜"},
{"name": "์ž์‚ฐ๊ด€๋ฆฌ", "type": "AIField", "description": "๋””์ง€ํ„ธ ๊ธฐ์ˆ ๊ณผ ๋งˆ์ด๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜์˜ ๋งž์ถคํ˜• ๊ฐœ์ธ ๊ธˆ์œต ์„œ๋น„์Šค"}
],
"relationships": [
("์‹ ํ•œ์€ํ–‰", "DEVELOPS", "๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €"),
("์‹ ํ•œ์€ํ–‰", "DEVELOPS", "์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค"),
("๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €", "APPLIES", "์ž์‚ฐ๊ด€๋ฆฌ"),
("์‹ ํ•œ AI ์  ํฌํŠธํด๋ฆฌ์˜ค", "USED_IN", "์ž์‚ฐ๊ด€๋ฆฌ"),
("์‹ ํ•œ์€ํ–‰", "PARTNERS_WITH", "์นด์นด์˜คํŽ˜์ด") # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
]
},
{
"article_id": "ART_GOLD_002",
"title": "์นด์นด์˜คํŽ˜์ด, ๋Œ€์•ˆ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ AI ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋ชจ๋ธ '์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€' ๊ตฌ์ถ• ์™„๋ฃŒ",
"url": "https://www.newsis.com/view/NISX20260520_0003637276",
"source": "ํ•œ๊ตญ๊ฒฝ์ œ",
"author": "์ดํŽ˜์ด ๊ธฐ์ž",
"published_date": "2026-05-20 10:15",
"content": (
"์นด์นด์˜คํŽ˜์ด๊ฐ€ ๋น…๋ฐ์ดํ„ฐ์™€ ๋จธ์‹ ๋Ÿฌ๋‹/๋”ฅ๋Ÿฌ๋‹์„ ์œตํ•ฉํ•˜์—ฌ ํ˜์‹ ์ ์ธ AI ๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€ ์‹œ์Šคํ…œ์ธ "
"'์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€' ์†”๋ฃจ์…˜์„ ๊ฐœ๋ฐœ ๋ฐ ๊ตฌ์ถ•์„ ์™„๋ฃŒํ•˜๊ณ  ํ˜„์žฅ์— ์ ์šฉํ–ˆ๋‹ค.\n"
"์ด ์‹œ์Šคํ…œ์€ ๊ธฐ์กด ์‹ ์šฉํ‰๊ฐ€์‚ฌ(CB)์˜ ์ด๋ ฅ ์ค‘์‹ฌ ํ‰๊ฐ€ ๋ชจ๋ธ์—์„œ ์†Œ์™ธ๋˜์—ˆ๋˜ ์ฒญ๋…„์ธต๊ณผ "
"๊ธˆ์œต์ด๋ ฅ ๋ถ€์กฑ์ž(์”ฌํŒŒ์ผ๋Ÿฌ)๋“ค์„ ์œ„ํ•ด ์นด์นด์˜คํŽ˜์ด ํ”Œ๋žซํผ ๋‚ด ๊ฒฐ์ œ ํŒจํ„ด, ์†ก๊ธˆ ๋ฐ ์ง€์ถœ ์„ฑํ–ฅ, "
"ํŽ˜์ด๋จธ๋‹ˆ ์ž”์•ก ๊ด€๋ฆฌ ์ถ”์ด ๋“ฑ ๋น„๊ธˆ์œต ๋Œ€์•ˆ ๋ฐ์ดํ„ฐ๋ฅผ ์ •๊ตํ•œ ๋”ฅ๋Ÿฌ๋‹๋ง์œผ๋กœ ๊ต์ฐจ ๋ถ„์„ํ•œ๋‹ค.\n"
"AI ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋„์ž…์„ ํ†ตํ•ด ์”ฌํŒŒ์ผ๋Ÿฌ๋“ค์˜ ๋Œ€์ถœ ์Šน์ธ ์žฅ๋ฒฝ์€ 30% ์ด์ƒ ๋‚ฎ์ถ”๋Š” ํ•œํŽธ, "
"AI์˜ ์ •ํ™•ํ•œ ๋ฆฌ์Šคํฌ ํ”„๋กœํŒŒ์ผ๋ง ๊ธฐ์ˆ ์„ ํ™œ์šฉํ•ด ์—ฐ์ฒด ๋ฐ ๊ธˆ์œต ๋ถ€์‹ค๋ฅ ์„ ํฌ๊ฒŒ ์–ต์ œํ•˜๋Š” ํšจ๊ณผ๋ฅผ ์ฆ๋ช…ํ–ˆ๋‹ค."
),
"entities": [
{"name": "์นด์นด์˜คํŽ˜์ด", "type": "AICompany", "description": "๋Œ€์•ˆ ๋Œ€์ถœ ์‹ฌ์‚ฌ ๋ฐ ํ•€ํ…Œํฌ ํ˜์‹ ์„ ์ด๋„๋Š” ์ข…ํ•ฉ ๋ชจ๋ฐ”์ผ ๊ฒฐ์ œ ํ”Œ๋žซํผ"},
{"name": "๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€", "type": "AITechnology", "description": "๋น„๊ธˆ์œต ๋Œ€์•ˆ ๋ฐ์ดํ„ฐ๋ฅผ ๋”ฅ๋Ÿฌ๋‹์œผ๋กœ ํ•™์Šตํ•˜์—ฌ ์‹ ์šฉ๋„๋ฅผ ์ธก์ •ํ•˜๋Š” ์ฐจ์„ธ๋Œ€ ์‹ ์šฉํ‰๊ฐ€ ๊ธฐ์ˆ "},
{"name": "์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€", "type": "AIService", "description": "์”ฌํŒŒ์ผ๋Ÿฌ๋ฅผ ์œ„ํ•œ ๋”ฅ๋Ÿฌ๋‹ ๊ธฐ๋ฐ˜ ๋Œ€์•ˆ ๋Œ€์ถœ ์‹ฌ์‚ฌ ๊ณ ๋„ํ™” ์†”๋ฃจ์…˜"},
{"name": "๋Œ€์ถœ์‹ฌ์‚ฌ", "type": "AIField", "description": "๋ฆฌ์Šคํฌ ํ”„๋กœํŒŒ์ผ๋ง ๋ฐ ํ•€ํ…Œํฌ ํ”Œ๋žซํผ ์—ฐ๊ณ„ ๊ธˆ์œต ์Šน์ธ ํ”„๋กœ์„ธ์Šค"}
],
"relationships": [
("์นด์นด์˜คํŽ˜์ด", "DEVELOPS", "๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€"),
("์นด์นด์˜คํŽ˜์ด", "DEVELOPS", "์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€"),
("๋Œ€์•ˆ์‹ ์šฉํ‰๊ฐ€", "APPLIES", "๋Œ€์ถœ์‹ฌ์‚ฌ"),
("์นด์นด์˜คํŽ˜์ด AI ์‹ ์šฉํ‰๊ฐ€", "USED_IN", "๋Œ€์ถœ์‹ฌ์‚ฌ"),
("์นด์นด์˜คํŽ˜์ด", "PARTNERS_WITH", "ํ† ์Šค๋ฑ…ํฌ") # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
]
},
{
"article_id": "ART_GOLD_003",
"title": "ํ† ์Šค๋ฑ…ํฌ, ์ƒ์„ฑํ˜• AI ๊ฒฐํ•ฉํ•œ ๋ณด์ด์Šคํ”ผ์‹ฑ ์‹ค์‹œ๊ฐ„ ํƒ์ง€ ์‹œ์Šคํ…œ 'ํ† ์Šค AI FDS'๋กœ ๊ธˆ์œต ์‚ฌ๊ธฐ ์›์ฒœ ์ฐจ๋‹จ",
"url": "https://www.dt.co.kr/article/12057506",
"source": "๋งค์ผ๊ฒฝ์ œ",
"author": "๋ฐ•ํ† ์Šค ๊ธฐ์ž",
"published_date": "2026-05-20 11:30",
"content": (
"ํ† ์Šค๋ฑ…ํฌ๊ฐ€ ๊ธˆ์œต๊ถŒ ์ตœ์ดˆ๋กœ ์ด์ƒ๊ธˆ์œต๊ฑฐ๋ž˜ํƒ์ง€์‹œ์Šคํ…œ(FDS)์— ์ƒ์„ฑํ˜• AI ์—”์ง„์„ ์žฅ์ฐฉํ•œ "
"'ํ† ์Šค AI FDS'๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋Ÿฐ์นญํ•˜์—ฌ ๋ณด์ด์Šคํ”ผ์‹ฑ ๋ฐ ์Šค๋งˆํŠธ ํ”ผ์‹ฑ์„ ์›์ฒœ ์ฐจ๋‹จํ•˜๊ณ  ์žˆ๋‹ค.\n"
"์ด ์‹œ์Šคํ…œ์€ ์‹ค์‹œ๊ฐ„์œผ๋กœ ๊ณ ์† ์œ ์ž…๋˜๋Š” ๋น„๋Œ€๋ฉด ๊ณ„์ขŒ ์ด์ฒด ๋ฐ ์›๊ฒฉ ์ œ์–ด ์•ฑ ๊ตฌ๋™ ๊ฑฐ๋ž˜ ๋‚ด์—ญ์„ "
"์ดˆ๊ณ ์† ๋ถ„์„ํ•˜์—ฌ ๊ธˆ์œต์‚ฌ๊ธฐ ์ง•ํ›„๋ฅผ ์‹ค์‹œ๊ฐ„ ํƒ์ง€ํ•ด ๋‚ธ๋‹ค.\n"
"ํ”ผ์‹ฑ ์˜์‹ฌ ๊ฑฐ๋ž˜๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด AI ์—”์ง„์ด ์ฆ‰์‹œ ํ•ด๋‹น ๊ณ„์ขŒ์˜ ์ด์ฒด๋ฅผ 0.1์ดˆ ๋‚ด๋กœ ๋™๊ฒฐ ์กฐ์น˜ํ•˜๊ณ , "
"ํ”ผํ•ด์ž์—๊ฒŒ ์‹ค์‹œ๊ฐ„ ๊ธด๊ธ‰ ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€์™€ ๊ฐ€์ด๋“œ ์Œ์„ฑ์„ ์ƒ์„ฑํ˜• AI๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๋ฐœ์†กํ•œ๋‹ค.\n"
"์ด๋ฅผ ํ†ตํ•ด ํ† ์Šค๋ฑ…ํฌ๋Š” ์ทจ์•ฝ๊ณ„์ธต์˜ ๋””์ง€ํ„ธ ๋ณด์ด์Šคํ”ผ์‹ฑ ํ”ผํ•ด ๋ฐœ์ƒ ๊ฑด์ˆ˜๋ฅผ ์˜ˆ๋…„ ๋Œ€๋น„ "
"70% ์ด์ƒ ํš๊ธฐ์ ์œผ๋กœ ๋‚ฎ์ถ”๋Š” ์‚ฌํšŒ์  ํŒŒ๊ธ‰ ํšจ๊ณผ๋ฅผ ๊ฑฐ๋‘์—ˆ๋‹ค."
),
"entities": [
{"name": "ํ† ์Šค๋ฑ…ํฌ", "type": "AICompany", "description": "๋””์ง€ํ„ธ ๊ธˆ์œต์˜ ์žฅ๋ฒฝ์„ ๋‚ฎ์ถ”๊ณ  ๊ฐ•๋ ฅํ•œ FDS ์˜ˆ๋ฐฉ์ฑ…์„ ์ œ๊ณตํ•˜๋Š” ๋ชจ๋ฐ”์ผ ์ธํ„ฐ๋„ท์ „๋ฌธ์€ํ–‰"},
{"name": "FDS", "type": "AITechnology", "description": "์‹ค์‹œ๊ฐ„ ๊ฑฐ๋ž˜ ํŒจํ„ด์˜ ๋น„์ •์ƒ ์œ ๋ฌด๋ฅผ AI๋กœ ํƒ์ง€ํ•˜๋Š” ์ด์ƒ๊ธˆ์œต๊ฑฐ๋ž˜ํƒ์ง€ ๊ธฐ์ˆ "},
{"name": "ํ† ์Šค AI FDS", "type": "AIService", "description": "์ƒ์„ฑํ˜• AI ๊ธฐ๋ฐ˜ ๋ณด์ด์Šคํ”ผ์‹ฑ ๋ฐ ์›๊ฒฉ์ œ์–ด ์ฐจ๋‹จ ๊ฒฐํ•ฉ ๊ธˆ์œต ๋ณด์•ˆ ์‹œ์Šคํ…œ"},
{"name": "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ", "type": "AIField", "description": "๋ณด์ด์Šคํ”ผ์‹ฑ ์ฐจ๋‹จ ๋ฐ ๋””์ง€ํ„ธ ๊ธˆ์œต ์•ˆ์‹ฌ ๊ฑฐ๋ž˜ ์„œ๋น„์Šค ๋ณด์•ˆ ์˜์—ญ"}
],
"relationships": [
("ํ† ์Šค๋ฑ…ํฌ", "DEVELOPS", "FDS"),
("ํ† ์Šค๋ฑ…ํฌ", "DEVELOPS", "ํ† ์Šค AI FDS"),
("FDS", "APPLIES", "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ"),
("ํ† ์Šค AI FDS", "USED_IN", "๊ธˆ์œต์‚ฌ๊ธฐ์˜ˆ๋ฐฉ"),
("ํ† ์Šค๋ฑ…ํฌ", "PARTNERS_WITH", "์‹ ํ•œ์€ํ–‰") # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
]
},
{
"article_id": "ART_GOLD_004",
"title": "๋„ค์ด๋ฒ„ํŽ˜์ด, ๋งˆ์ด๋ฐ์ดํ„ฐ์™€ ์ดˆ๊ฑฐ๋Œ€ AI ๊ฒฐํ•ฉํ•œ ๊ฐœ์ธ ๋งž์ถคํ˜• '๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ' ์ถœ์‹œ",
"url": "https://www.thevaluenews.co.kr/news/view.php?idx=198871",
"source": "๋””์ง€ํ„ธ๋ฐ์ผ๋ฆฌ",
"author": "์ตœ๋ฐ์ดํ„ฐ ๊ธฐ์ž",
"published_date": "2026-05-20 14:00",
"content": (
"๋„ค์ด๋ฒ„ํŽ˜์ด๊ฐ€ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ธํ”„๋ผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ตญ๋‚ด ์ตœ๊ณ ์˜ ์ดˆ๊ฑฐ๋Œ€ ์–ธ์–ด๋ชจ๋ธ์„ ๊ฒฐํ•ฉํ•œ "
"์Šค๋งˆํŠธ ์ž์‚ฐ ๋ถ„์„ ์ฑ—๋ด‡ ์„œ๋น„์Šค์ธ '๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ'๋ฅผ ์ •์‹ ์ถœ์‹œํ–ˆ๋‹ค.\n"
"์ด ํ”Œ๋žซํผ์€ ํฉ์–ด์ง„ ๊ณ ๊ฐ์˜ ์€ํ–‰, ์นด๋“œ์‚ฌ, ์ฆ๊ถŒ์‚ฌ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ •๋ณด๋ฅผ ํ•œ๋ฐ ๋ชจ์€ ๋’ค "
"๊ฐœ๊ฐœ์ธ์˜ ์†Œ๋น„ ํ˜„ํ™ฉ ๋ถ„์„, ์ง€์ถœ ๋‹ค์ด์–ดํŠธ ๊ฐ€์ด๋“œ, ์ตœ์ ์˜ ๊ธˆ์œต ์ƒํ’ˆ ๊ธˆ๋ฆฌ ๋น„๊ต ํ˜œํƒ์„ ์ œ๊ณตํ•œ๋‹ค.\n"
"์ดˆ๊ฑฐ๋Œ€ AI ๊ธฐ์ˆ ์ด ์ ‘๋ชฉ๋˜์–ด ๋‹จ์ˆœ ์ˆซ์ž ๋‚˜์—ด์— ๊ทธ์ณค๋˜ ๊ธฐ์กด ๋งˆ์ด๋ฐ์ดํ„ฐ ๋ถ„์„ ํ‹€์„ ๋ฒ—์–ด๋‚˜ "
"์ ˆ์„ธ ๋น„๋ฒ•์ด๋‚˜ ์ด์ž ์ ˆ์•ฝ ๊ฐ€์ด๋“œ๋ฅผ ์นœ๊ทผํ•œ ๋ฉ”์‹ ์ € ๋Œ€ํ™” ํ˜•ํƒœ๋กœ 24์‹œ๊ฐ„ ์ƒ๋‹ด ๋ธŒ๋ฆฌํ•‘ํ•ด ์ค€๋‹ค.\n"
"์ด๋กœ์จ ๋„ค์ด๋ฒ„ํŽ˜์ด๋Š” ๊ณ ๋„ํ™”๋œ ์ดˆ์ •๋ฐ€ ๋งˆ์ด๋ฐ์ดํ„ฐ AI ์ž์‚ฐ ์ถ”์ฒœ ํ”Œ๋žซํผ์œผ๋กœ ํ•œ ๋‹จ๊ณ„ ๋„์•ฝํ–ˆ๋‹ค."
),
"entities": [
{"name": "๋„ค์ด๋ฒ„ํŽ˜์ด", "type": "AICompany", "description": "์ง€์ถœ ๋ถ„์„ ๋ฐ ๊ธˆ์œต ์ถ”์ฒœ ๋“ฑ ๋””์ง€ํ„ธ ๋งˆ์ด๋ฐ์ดํ„ฐ ์ƒํƒœ๊ณ„๋ฅผ ์„ ๋„ํ•˜๋Š” ์ข…ํ•ฉ ๊ธˆ์œต ํ”Œ๋žซํผ"},
{"name": "๋งˆ์ด๋ฐ์ดํ„ฐ", "type": "AITechnology", "description": "๋ถ„์‚ฐ๋œ ๊ธˆ์œต ๊ธฐ๊ด€ ์ •๋ณด๋ฅผ ํ•œ๋ฐ ๋ชจ์•„ ๊ฐ€์น˜๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ข…ํ•ฉ ๊ธˆ์œต ์ž์‚ฐ ๋ฐ์ดํ„ฐ ๊ธฐ์ˆ "},
{"name": "๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ", "type": "AIService", "description": "์ดˆ๊ฑฐ๋Œ€ LLM์„ ๋งˆ์ด๋ฐ์ดํ„ฐ์™€ ๊ฒฐํ•ฉํ•˜์—ฌ ๋Œ€ํ™”ํ˜• ์ƒ๋‹ด์„ ์ œ๊ณตํ•˜๋Š” ์ž์‚ฐ ์ปจ์„คํ„ดํŠธ ์„œ๋น„์Šค"},
{"name": "๋””์ง€ํ„ธ๊ธˆ์œต", "type": "AIField", "description": "ํ•€ํ…Œํฌ ์—ฐ๊ณ„ ๊ฐœ์ธ ์ง€์ถœ ๋‹ค์ด์–ดํŠธ ๋ฐ ๋งž์ถค ์ƒํ’ˆ ๋น„๊ต ์ถ”์ฒœ ํ˜์‹  ์˜์—ญ"}
],
"relationships": [
("๋„ค์ด๋ฒ„ํŽ˜์ด", "DEVELOPS", "๋งˆ์ด๋ฐ์ดํ„ฐ"),
("๋„ค์ด๋ฒ„ํŽ˜์ด", "DEVELOPS", "๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ"),
("๋งˆ์ด๋ฐ์ดํ„ฐ", "APPLIES", "๋””์ง€ํ„ธ๊ธˆ์œต"),
("๋„ค์ด๋ฒ„ํŽ˜์ด AI ๊ธˆ์œต ๋น„์„œ", "USED_IN", "๋””์ง€ํ„ธ๊ธˆ์œต"),
("๋„ค์ด๋ฒ„ํŽ˜์ด", "PARTNERS_WITH", "์‹ ํ•œ์€ํ–‰") # ํฌ๋กœ์Šค ๋„๋ฉ”์ธ ์—ฐ๊ณ„
]
}
]
def main():
print("[INIT] Neo4j AuraDB ๋“œ๋ผ์ด๋ฒ„ ์ดˆ๊ธฐํ™” ๋ฐ ์—ฐ๊ฒฐ ์‹œ๋„...")
driver = get_neo4j_driver()
print("[INIT] [OK] Neo4j ์—ฐ๊ฒฐ ๋ฌด๊ฒฐ์„ฑ ๊ฒ€์ฆ ํ†ต๊ณผ")
with driver.session() as session:
# 100% ๊นจ๋—ํ•œ ์‹ ๊ทœ ๊ตฌ์ถ•์„ ์œ„ํ•ด ๊ธฐ์กด์— ๊ด€๊ณ„์„  ์—†์ด ํฉ์–ด์ ธ์žˆ๋˜ ๋…ธ๋“œ์™€ ๊ด€๊ณ„๋ฅผ ๋ชจ๋‘ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.
print("[RESET] ๊ธฐ์กด ๊ทธ๋ž˜ํ”„ ๋ฐ์ดํ„ฐ๋ฅผ ๊นจ๋—ํ•˜๊ฒŒ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค (DETACH DELETE)...")
session.run("MATCH (n) DETACH DELETE n")
print("[RESET] [OK] ๊ธฐ์กด ๋ฐ์ดํ„ฐ ์™„์ „ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
print("[LOAD] 4๋Œ€ ํ•€ํ…Œํฌ ๊ณจ๋“œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ ์ ์žฌ ํ”„๋กœ์„ธ์Šค๋ฅผ ๊ฐ€๋™ํ•ฉ๋‹ˆ๋‹ค...")
# ๋ชจ๋“  ๊ณจ๋“œ ์—”ํ‹ฐํ‹ฐ์˜ ํƒ€์ž…์„ ์‚ฌ์ „์— ๋งคํ•‘ ํ…Œ์ด๋ธ”๋กœ ๊ตฌ์ถ•ํ•˜์—ฌ StopIteration ๋ฐฉ์ง€
entity_types = {}
for a in GOLD_ARTICLES:
for e in a["entities"]:
entity_types[e["name"]] = e["type"]
for idx, art in enumerate(GOLD_ARTICLES, 1):
print(f"\n({idx}/{len(GOLD_ARTICLES)}) [ART] '{art['title'][:35]}...' ์ ์žฌ ์ค‘...")
# 1. Article ๋…ธ๋“œ ์ƒ์„ฑ (์ค‘๋ณต ์—†์ด MERGE)
session.run("""
MERGE (a:Article {article_id: $article_id})
SET a.title = $title,
a.url = $url,
a.content = $content,
a.source = $source,
a.author = $author,
a.published_date = $published_date,
a.category = '๊ฒฝ์ œ'
""", {
"article_id": art["article_id"],
"title": art["title"],
"url": art["url"],
"content": art["content"],
"source": art["source"],
"author": art["author"],
"published_date": art["published_date"]
})
# 2. Content ์ฒญํ‚น ๋…ธ๋“œ ๋ฐ 1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ/์—ฐ๊ฒฐ
print(" -> ์‹ค์‹œ๊ฐ„ OpenAI 1536์ฐจ์› ๋ฒกํ„ฐ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์ค‘...")
# ๋ฌธ์žฅ ๊ธฐ๋ฐ˜์œผ๋กœ ๋ณธ๋ฌธ์„ 2๊ฐœ ์ฒญํฌ๋กœ ์ธ์œ„ ๋ถ„ํ• ํ•˜์—ฌ ์ง€์‹ ๋ฐ€๋„ ๊ฐ•ํ™”
paragraphs = [p.strip() for p in art["content"].split("\n") if p.strip()]
for chunk_idx, para in enumerate(paragraphs, 1):
chunk_id = f"{art['article_id']}_CHK_{chunk_idx}"
embedding = get_embedding(para)
# Content ๋…ธ๋“œ ์ƒ์„ฑ ๋ฐ HAS_CHUNK ์—ฐ๊ฒฐ
session.run("""
MATCH (a:Article {article_id: $article_id})
MERGE (c:Content {chunk_id: $chunk_id})
SET c.chunk = $chunk,
c.embedding = $embedding,
c.article_id = $article_id
MERGE (a)-[:HAS_CHUNK]->(c)
""", {
"article_id": art["article_id"],
"chunk_id": chunk_id,
"chunk": para,
"embedding": embedding
})
# 3. Entities ์ƒ์„ฑ ๋ฐ Article -[:MENTIONS]-> Entity ์—ฐ๊ฒฐ
for ent in art["entities"]:
# ๊ฐ ์—”ํ‹ฐํ‹ฐ ํƒ€์ž…์— ๋งž๋Š” ๋ ˆ์ด๋ธ”์„ ๊ฐ–๋Š” ๋…ธ๋“œ๋ฅผ ๋™์ ์œผ๋กœ ์ƒ์„ฑํ•˜๊ณ ,
# ๊ณตํ†ต ๋ ˆ์ด๋ธ”๋กœ์„œ๋„ ๊ฒ€์ƒ‰ ๊ฐ€๋Šฅํ•˜๊ฒŒ ์„ค๊ณ„
cypher_merge = f"""
MERGE (e:{ent['type']} {{name: $name}})
SET e.description = $description
RETURN e
"""
session.run(cypher_merge, {"name": ent["name"], "description": ent["description"]})
# Article -[:MENTIONS]-> Entity
session.run(f"""
MATCH (a:Article {{article_id: $article_id}})
MATCH (e:{ent['type']} {{name: $name}})
MERGE (a)-[:MENTIONS]->(e)
""", {"article_id": art["article_id"], "name": ent["name"]})
print(f" - [ENT] ({ent['type']}) {ent['name']} ์™„๋ฃŒ")
# 4. ์—”ํ‹ฐํ‹ฐ ๊ฐ„ ์ง์ ‘ ๊ด€๊ณ„ ์—ฐ๊ฒฐ์„ฑ ์ƒ์„ฑ
for src_name, rel_type, tgt_name in art["relationships"]:
# ๊ตฌ์ถ•ํ•ด ๋‘” ๋งคํ•‘ ํ…Œ์ด๋ธ”์„ ์‚ฌ์šฉํ•˜์—ฌ ์ค‘๋‹จ ์˜ค๋ฅ˜ ์›์ฒœ ์˜ˆ๋ฐฉ
src_type = entity_types.get(src_name, "AICompany")
tgt_type = entity_types.get(tgt_name, "AICompany")
cypher_rel = f"""
MATCH (s:{src_type} {{name: $src_name}})
MATCH (t:{tgt_type} {{name: $tgt_name}})
MERGE (s)-[:{rel_type}]->(t)
"""
session.run(cypher_rel, {"src_name": src_name, "tgt_name": tgt_name})
print(f" - [REL] ({src_name})-[:{rel_type}]->({tgt_name}) ์—ฐ๊ฒฐ")
# 5. ๊ด€๊ณ„ ๋ฐ€๋„ ํ†ต๊ณ„ ์ถœ๋ ฅ
print("\n[OK] 4๋Œ€ ํ•€ํ…Œํฌ ๊ณจ๋“œ ๋ฐ์ดํ„ฐ ์ ์žฌ ์™„๋ฃŒ!")
total_rels = session.run("""
MATCH ()-[r:DEVELOPS|INVESTS_IN|PARTNERS_WITH|APPLIES|USED_IN|RELATED_TO]->()
RETURN count(r) as cnt
""").single()["cnt"]
total_articles = session.run("MATCH (a:Article) RETURN count(a) as cnt").single()["cnt"]
avg_density = total_rels / total_articles if total_articles > 0 else 0
print(f"[STATUS] ํ˜„์žฌ ์ ์žฌ๋œ ์ด ๊ธฐ์‚ฌ ์ˆ˜: {total_articles}๊ฐœ")
print(f"[STATUS] ์—”ํ‹ฐํ‹ฐ ๊ฐ„ ์ง์ ‘ ๊ด€๊ณ„ ์ด์ˆ˜: {total_rels}๊ฐœ")
print(f"[STATUS] ๊ธฐ์‚ฌ๋‹น ํ‰๊ท  ๊ด€๊ณ„์ˆ˜: {avg_density:.1f}๊ฐœ (๋ชฉํ‘œ: 3.0๊ฐœ ์ด์ƒ)")
driver.close()
print("[DONE] ํ”„๋กœ์„ธ์Šค ์ •์ƒ ์ข…๋ฃŒ")
if __name__ == "__main__":
main()