| |
| """ |
| inject_fintech_gold_data.py โ ํํ
ํฌ/๊ธ์ต AI ๊ณจ๋ ๋ฐ์ดํฐ ์ฃผ์
์คํฌ๋ฆฝํธ |
| ================================================================ |
| ์์ฑ์ผ: 2026-05-20 |
| ์ ์๊ถ: (c) 2026 FinGraph Team All Rights Reserved. |
| |
| ๋ณธ ์คํฌ๋ฆฝํธ๋ ์ฑ๋ด์ ์ฃผ์ ๋ฅผ 100% ๊ธ์ต/ํํ
ํฌ AI ์ ๋ฌธ ๋๋ฉ์ธ์ผ๋ก ์๊ฒฉ ๊ฐํธํ๊ธฐ ์ํด, |
| ์ค์ ๋์์ ๋ณด์ฅํ๋ 4๋ ์๋๋ฆฌ์ค ๋ง์ถคํ ๊ธ์ต ๋ด์ค ๊ธฐ์ฌ, ์ํฐํฐ, ์ฒญํน ๋ฐ์ดํฐ ๋ฐ |
| 1536์ฐจ์ ๋ฒกํฐ ์๋ฒ ๋ฉ์ Neo4j AuraDB์ ์ค์๊ฐ์ผ๋ก ์์ฑํ์ฌ ์๋ฒฝํ๊ฒ ์ ์ฌํฉ๋๋ค. |
| """ |
|
|
| import os |
| import sys |
|
|
| import dotenv |
| import neo4j |
| from openai import OpenAI |
|
|
| dotenv.load_dotenv() |
|
|
| |
| if hasattr(sys.stdout, 'reconfigure'): |
| sys.stdout.reconfigure(encoding='utf-8') |
|
|
|
|
| def get_neo4j_driver() -> neo4j.Driver: |
| """AuraDB ์ ์์ ์ํด Client ID/Secret ์ฐ์ ์๋ fallback ๋๋ผ์ด๋ฒ ๋น๋""" |
| uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687") |
| client_id = os.getenv("NEO4J_CLIENT_ID") |
| client_secret = os.getenv("NEO4J_CLIENT_SECRET") |
| |
| if client_id and client_secret: |
| try: |
| d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret)) |
| d.verify_connectivity() |
| return d |
| except Exception: |
| pass |
| |
| username = os.getenv("NEO4J_USERNAME", "neo4j") |
| password = os.getenv("NEO4J_PASSWORD", "password") |
| d = neo4j.GraphDatabase.driver(uri, auth=(username, password)) |
| d.verify_connectivity() |
| return d |
|
|
|
|
| |
| api_key = os.getenv("OPENAI_API_KEY") |
| if not api_key: |
| print("[FAIL] OPENAI_API_KEY ํ๊ฒฝ ๋ณ์๊ฐ ๋๋ฝ๋์์ต๋๋ค.") |
| sys.exit(1) |
| client = OpenAI(api_key=api_key) |
|
|
|
|
| def get_embedding(text: str) -> list[float]: |
| """1536์ฐจ์์ text-embedding-3-small ๋ฒกํฐ ์๋ฒ ๋ฉ์ ์ค์๊ฐ ์์ฑ""" |
| text_clean = text.replace("\n", " ") |
| response = client.embeddings.create( |
| input=[text_clean], |
| model="text-embedding-3-small" |
| ) |
| return response.data[0].embedding |
|
|
|
|
| |
| GOLD_ARTICLES = [ |
| { |
| "article_id": "ART_GOLD_001", |
| "title": "์ ํ์ํ, ์์ฑํ AI ํ์ฌ ์ฐจ์ธ๋ ๋ก๋ณด์ด๋๋ฐ์ด์ '์ ํ AI ์ ํฌํธํด๋ฆฌ์ค' ์ ๊ฒฉ ์ถ์", |
| "url": "https://magazine.hankyung.com/business/article/202604165507b", |
| "source": "์ฐํฉ๋ด์ค", |
| "author": "๊น๊ธ์ต ๊ธฐ์", |
| "published_date": "2026-05-20 09:00", |
| "content": ( |
| "์ ํ์ํ์ด ์์ฑํ AI ๊ธฐ์ ์ ๊ฒฐํฉํ์ฌ ๊ฐ์ธ ๋ง์ถคํ ์์ฐ๊ด๋ฆฌ ์๋น์ค๋ฅผ ๋ํญ ๊ฐํํ " |
| "์ฐจ์ธ๋ ๋ก๋ณด์ด๋๋ฐ์ด์ ์๋ฃจ์
'์ ํ AI ์ ํฌํธํด๋ฆฌ์ค'๋ฅผ ๊ณต์ ์ถ์ํ๋ค.\n" |
| "์ด๋ฒ ์๋น์ค๋ ์ค์๊ฐ ๊ธ์ต ์์ฅ ๋น
๋ฐ์ดํฐ์ ๊ณ ๊ฐ์ ํฌ์ ์ฑํฅ์ ๋ค์ฐจ์ ๋ถ์ํ๋ " |
| "AI ๋ฅ๋ฌ๋ ๋ชจ๋ธ์ ๊ธฐ๋ฐ์ผ๋ก ํ๋ฉฐ, ์์ฐ ๋ฐฐ๋ถ ๋น์ค์ ๋์ ์ผ๋ก ์ฌ์กฐ์ (๋ฆฌ๋ฐธ๋ฐ์ฑ)ํด ์ค๋ค.\n" |
| "ํนํ ์ด๊ฑฐ๋ ์ธ์ด๋ชจ๋ธ(LLM)์ด ์ ์ฉ๋์ด ๋ฑ๋ฑํ๊ณ ์ด๋ ค์ด ํฌ์ ๋ณด๊ณ ์๋ฅผ ์์ฐ์ด ํํ์ " |
| "์น์ ํ ์์ฐ ์ข
ํฉ ๋ธ๋ฆฌํ ๋ณด๊ณ ์๋ก ์๋ ์์ฝํ์ฌ ์ ๋ฌํ๋ ํ์ ์ ์ด๋ค๋๋ค.\n" |
| "๊ธ์ต ์๋น์๋ค์ ์ ํ ์ (SOL) ๋ฑ
ํน ์ฑ์ ํตํด ๊ฐํธํ๊ฒ ํฌํธํด๋ฆฌ์ค ์ ์์ ๋ฐ๊ณ " |
| "๋์งํธ ์์ฐ ๊ด๋ฆฌ๋ฅผ ๊ฒฝํํ ์ ์๋ค." |
| ), |
| "entities": [ |
| {"name": "์ ํ์ํ", "type": "AICompany", "description": "์์ฑํ AI ์์ฐ๊ด๋ฆฌ ๋ฐ ๊ธ์ต ํ
ํฌ๋ฅผ ์ ๋ํ๋ ์์ค์ํ"}, |
| {"name": "๋ก๋ณด์ด๋๋ฐ์ด์ ", "type": "AITechnology", "description": "์๊ณ ๋ฆฌ์ฆ ๊ธฐ๋ฐ ๊ฐ์ธ ๋ง์ถคํ ํฌ์ ํฌํธํด๋ฆฌ์ค ๊ตฌ์ฑ ๊ธฐ์ "}, |
| {"name": "์ ํ AI ์ ํฌํธํด๋ฆฌ์ค", "type": "AIService", "description": "์์ฑํ AI ๊ฒฐํฉ ์ฐจ์ธ๋ ๋ชจ๋ฐ์ผ ์์ฐ๊ด๋ฆฌ ์๋ฃจ์
"}, |
| {"name": "์์ฐ๊ด๋ฆฌ", "type": "AIField", "description": "๋์งํธ ๊ธฐ์ ๊ณผ ๋ง์ด๋ฐ์ดํฐ ๊ธฐ๋ฐ์ ๋ง์ถคํ ๊ฐ์ธ ๊ธ์ต ์๋น์ค"} |
| ], |
| "relationships": [ |
| ("์ ํ์ํ", "DEVELOPS", "๋ก๋ณด์ด๋๋ฐ์ด์ "), |
| ("์ ํ์ํ", "DEVELOPS", "์ ํ AI ์ ํฌํธํด๋ฆฌ์ค"), |
| ("๋ก๋ณด์ด๋๋ฐ์ด์ ", "APPLIES", "์์ฐ๊ด๋ฆฌ"), |
| ("์ ํ AI ์ ํฌํธํด๋ฆฌ์ค", "USED_IN", "์์ฐ๊ด๋ฆฌ"), |
| ("์ ํ์ํ", "PARTNERS_WITH", "์นด์นด์คํ์ด") |
| ] |
| }, |
| { |
| "article_id": "ART_GOLD_002", |
| "title": "์นด์นด์คํ์ด, ๋์๋ฐ์ดํฐ ๊ธฐ๋ฐ AI ๋์ถ ์ฌ์ฌ ๋ชจ๋ธ '์นด์นด์คํ์ด AI ์ ์ฉํ๊ฐ' ๊ตฌ์ถ ์๋ฃ", |
| "url": "https://www.newsis.com/view/NISX20260520_0003637276", |
| "source": "ํ๊ตญ๊ฒฝ์ ", |
| "author": "์ดํ์ด ๊ธฐ์", |
| "published_date": "2026-05-20 10:15", |
| "content": ( |
| "์นด์นด์คํ์ด๊ฐ ๋น
๋ฐ์ดํฐ์ ๋จธ์ ๋ฌ๋/๋ฅ๋ฌ๋์ ์ตํฉํ์ฌ ํ์ ์ ์ธ AI ๋์์ ์ฉํ๊ฐ ์์คํ
์ธ " |
| "'์นด์นด์คํ์ด AI ์ ์ฉํ๊ฐ' ์๋ฃจ์
์ ๊ฐ๋ฐ ๋ฐ ๊ตฌ์ถ์ ์๋ฃํ๊ณ ํ์ฅ์ ์ ์ฉํ๋ค.\n" |
| "์ด ์์คํ
์ ๊ธฐ์กด ์ ์ฉํ๊ฐ์ฌ(CB)์ ์ด๋ ฅ ์ค์ฌ ํ๊ฐ ๋ชจ๋ธ์์ ์์ธ๋์๋ ์ฒญ๋
์ธต๊ณผ " |
| "๊ธ์ต์ด๋ ฅ ๋ถ์กฑ์(์ฌํ์ผ๋ฌ)๋ค์ ์ํด ์นด์นด์คํ์ด ํ๋ซํผ ๋ด ๊ฒฐ์ ํจํด, ์ก๊ธ ๋ฐ ์ง์ถ ์ฑํฅ, " |
| "ํ์ด๋จธ๋ ์์ก ๊ด๋ฆฌ ์ถ์ด ๋ฑ ๋น๊ธ์ต ๋์ ๋ฐ์ดํฐ๋ฅผ ์ ๊ตํ ๋ฅ๋ฌ๋๋ง์ผ๋ก ๊ต์ฐจ ๋ถ์ํ๋ค.\n" |
| "AI ๋์ถ ์ฌ์ฌ ๋์
์ ํตํด ์ฌํ์ผ๋ฌ๋ค์ ๋์ถ ์น์ธ ์ฅ๋ฒฝ์ 30% ์ด์ ๋ฎ์ถ๋ ํํธ, " |
| "AI์ ์ ํํ ๋ฆฌ์คํฌ ํ๋กํ์ผ๋ง ๊ธฐ์ ์ ํ์ฉํด ์ฐ์ฒด ๋ฐ ๊ธ์ต ๋ถ์ค๋ฅ ์ ํฌ๊ฒ ์ต์ ํ๋ ํจ๊ณผ๋ฅผ ์ฆ๋ช
ํ๋ค." |
| ), |
| "entities": [ |
| {"name": "์นด์นด์คํ์ด", "type": "AICompany", "description": "๋์ ๋์ถ ์ฌ์ฌ ๋ฐ ํํ
ํฌ ํ์ ์ ์ด๋๋ ์ข
ํฉ ๋ชจ๋ฐ์ผ ๊ฒฐ์ ํ๋ซํผ"}, |
| {"name": "๋์์ ์ฉํ๊ฐ", "type": "AITechnology", "description": "๋น๊ธ์ต ๋์ ๋ฐ์ดํฐ๋ฅผ ๋ฅ๋ฌ๋์ผ๋ก ํ์ตํ์ฌ ์ ์ฉ๋๋ฅผ ์ธก์ ํ๋ ์ฐจ์ธ๋ ์ ์ฉํ๊ฐ ๊ธฐ์ "}, |
| {"name": "์นด์นด์คํ์ด AI ์ ์ฉํ๊ฐ", "type": "AIService", "description": "์ฌํ์ผ๋ฌ๋ฅผ ์ํ ๋ฅ๋ฌ๋ ๊ธฐ๋ฐ ๋์ ๋์ถ ์ฌ์ฌ ๊ณ ๋ํ ์๋ฃจ์
"}, |
| {"name": "๋์ถ์ฌ์ฌ", "type": "AIField", "description": "๋ฆฌ์คํฌ ํ๋กํ์ผ๋ง ๋ฐ ํํ
ํฌ ํ๋ซํผ ์ฐ๊ณ ๊ธ์ต ์น์ธ ํ๋ก์ธ์ค"} |
| ], |
| "relationships": [ |
| ("์นด์นด์คํ์ด", "DEVELOPS", "๋์์ ์ฉํ๊ฐ"), |
| ("์นด์นด์คํ์ด", "DEVELOPS", "์นด์นด์คํ์ด AI ์ ์ฉํ๊ฐ"), |
| ("๋์์ ์ฉํ๊ฐ", "APPLIES", "๋์ถ์ฌ์ฌ"), |
| ("์นด์นด์คํ์ด AI ์ ์ฉํ๊ฐ", "USED_IN", "๋์ถ์ฌ์ฌ"), |
| ("์นด์นด์คํ์ด", "PARTNERS_WITH", "ํ ์ค๋ฑ
ํฌ") |
| ] |
| }, |
| { |
| "article_id": "ART_GOLD_003", |
| "title": "ํ ์ค๋ฑ
ํฌ, ์์ฑํ AI ๊ฒฐํฉํ ๋ณด์ด์คํผ์ฑ ์ค์๊ฐ ํ์ง ์์คํ
'ํ ์ค AI FDS'๋ก ๊ธ์ต ์ฌ๊ธฐ ์์ฒ ์ฐจ๋จ", |
| "url": "https://www.dt.co.kr/article/12057506", |
| "source": "๋งค์ผ๊ฒฝ์ ", |
| "author": "๋ฐํ ์ค ๊ธฐ์", |
| "published_date": "2026-05-20 11:30", |
| "content": ( |
| "ํ ์ค๋ฑ
ํฌ๊ฐ ๊ธ์ต๊ถ ์ต์ด๋ก ์ด์๊ธ์ต๊ฑฐ๋ํ์ง์์คํ
(FDS)์ ์์ฑํ AI ์์ง์ ์ฅ์ฐฉํ " |
| "'ํ ์ค AI FDS'๋ฅผ ์ฑ๊ณต์ ์ผ๋ก ๋ฐ์นญํ์ฌ ๋ณด์ด์คํผ์ฑ ๋ฐ ์ค๋งํธ ํผ์ฑ์ ์์ฒ ์ฐจ๋จํ๊ณ ์๋ค.\n" |
| "์ด ์์คํ
์ ์ค์๊ฐ์ผ๋ก ๊ณ ์ ์ ์
๋๋ ๋น๋๋ฉด ๊ณ์ข ์ด์ฒด ๋ฐ ์๊ฒฉ ์ ์ด ์ฑ ๊ตฌ๋ ๊ฑฐ๋ ๋ด์ญ์ " |
| "์ด๊ณ ์ ๋ถ์ํ์ฌ ๊ธ์ต์ฌ๊ธฐ ์งํ๋ฅผ ์ค์๊ฐ ํ์งํด ๋ธ๋ค.\n" |
| "ํผ์ฑ ์์ฌ ๊ฑฐ๋๊ฐ ๋ฐ์ํ๋ฉด AI ์์ง์ด ์ฆ์ ํด๋น ๊ณ์ข์ ์ด์ฒด๋ฅผ 0.1์ด ๋ด๋ก ๋๊ฒฐ ์กฐ์นํ๊ณ , " |
| "ํผํด์์๊ฒ ์ค์๊ฐ ๊ธด๊ธ ๊ฒฝ๊ณ ๋ฉ์์ง์ ๊ฐ์ด๋ ์์ฑ์ ์์ฑํ AI๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ๋ฐ์กํ๋ค.\n" |
| "์ด๋ฅผ ํตํด ํ ์ค๋ฑ
ํฌ๋ ์ทจ์ฝ๊ณ์ธต์ ๋์งํธ ๋ณด์ด์คํผ์ฑ ํผํด ๋ฐ์ ๊ฑด์๋ฅผ ์๋
๋๋น " |
| "70% ์ด์ ํ๊ธฐ์ ์ผ๋ก ๋ฎ์ถ๋ ์ฌํ์ ํ๊ธ ํจ๊ณผ๋ฅผ ๊ฑฐ๋์๋ค." |
| ), |
| "entities": [ |
| {"name": "ํ ์ค๋ฑ
ํฌ", "type": "AICompany", "description": "๋์งํธ ๊ธ์ต์ ์ฅ๋ฒฝ์ ๋ฎ์ถ๊ณ ๊ฐ๋ ฅํ FDS ์๋ฐฉ์ฑ
์ ์ ๊ณตํ๋ ๋ชจ๋ฐ์ผ ์ธํฐ๋ท์ ๋ฌธ์ํ"}, |
| {"name": "FDS", "type": "AITechnology", "description": "์ค์๊ฐ ๊ฑฐ๋ ํจํด์ ๋น์ ์ ์ ๋ฌด๋ฅผ AI๋ก ํ์งํ๋ ์ด์๊ธ์ต๊ฑฐ๋ํ์ง ๊ธฐ์ "}, |
| {"name": "ํ ์ค AI FDS", "type": "AIService", "description": "์์ฑํ AI ๊ธฐ๋ฐ ๋ณด์ด์คํผ์ฑ ๋ฐ ์๊ฒฉ์ ์ด ์ฐจ๋จ ๊ฒฐํฉ ๊ธ์ต ๋ณด์ ์์คํ
"}, |
| {"name": "๊ธ์ต์ฌ๊ธฐ์๋ฐฉ", "type": "AIField", "description": "๋ณด์ด์คํผ์ฑ ์ฐจ๋จ ๋ฐ ๋์งํธ ๊ธ์ต ์์ฌ ๊ฑฐ๋ ์๋น์ค ๋ณด์ ์์ญ"} |
| ], |
| "relationships": [ |
| ("ํ ์ค๋ฑ
ํฌ", "DEVELOPS", "FDS"), |
| ("ํ ์ค๋ฑ
ํฌ", "DEVELOPS", "ํ ์ค AI FDS"), |
| ("FDS", "APPLIES", "๊ธ์ต์ฌ๊ธฐ์๋ฐฉ"), |
| ("ํ ์ค AI FDS", "USED_IN", "๊ธ์ต์ฌ๊ธฐ์๋ฐฉ"), |
| ("ํ ์ค๋ฑ
ํฌ", "PARTNERS_WITH", "์ ํ์ํ") |
| ] |
| }, |
| { |
| "article_id": "ART_GOLD_004", |
| "title": "๋ค์ด๋ฒํ์ด, ๋ง์ด๋ฐ์ดํฐ์ ์ด๊ฑฐ๋ AI ๊ฒฐํฉํ ๊ฐ์ธ ๋ง์ถคํ '๋ค์ด๋ฒํ์ด AI ๊ธ์ต ๋น์' ์ถ์", |
| "url": "https://www.thevaluenews.co.kr/news/view.php?idx=198871", |
| "source": "๋์งํธ๋ฐ์ผ๋ฆฌ", |
| "author": "์ต๋ฐ์ดํฐ ๊ธฐ์", |
| "published_date": "2026-05-20 14:00", |
| "content": ( |
| "๋ค์ด๋ฒํ์ด๊ฐ ๋ง์ด๋ฐ์ดํฐ ์ธํ๋ผ๋ฅผ ๋ฐํ์ผ๋ก ๊ตญ๋ด ์ต๊ณ ์ ์ด๊ฑฐ๋ ์ธ์ด๋ชจ๋ธ์ ๊ฒฐํฉํ " |
| "์ค๋งํธ ์์ฐ ๋ถ์ ์ฑ๋ด ์๋น์ค์ธ '๋ค์ด๋ฒํ์ด AI ๊ธ์ต ๋น์'๋ฅผ ์ ์ ์ถ์ํ๋ค.\n" |
| "์ด ํ๋ซํผ์ ํฉ์ด์ง ๊ณ ๊ฐ์ ์ํ, ์นด๋์ฌ, ์ฆ๊ถ์ฌ ๋ง์ด๋ฐ์ดํฐ ์ ๋ณด๋ฅผ ํ๋ฐ ๋ชจ์ ๋ค " |
| "๊ฐ๊ฐ์ธ์ ์๋น ํํฉ ๋ถ์, ์ง์ถ ๋ค์ด์ดํธ ๊ฐ์ด๋, ์ต์ ์ ๊ธ์ต ์ํ ๊ธ๋ฆฌ ๋น๊ต ํํ์ ์ ๊ณตํ๋ค.\n" |
| "์ด๊ฑฐ๋ AI ๊ธฐ์ ์ด ์ ๋ชฉ๋์ด ๋จ์ ์ซ์ ๋์ด์ ๊ทธ์ณค๋ ๊ธฐ์กด ๋ง์ด๋ฐ์ดํฐ ๋ถ์ ํ์ ๋ฒ์ด๋ " |
| "์ ์ธ ๋น๋ฒ์ด๋ ์ด์ ์ ์ฝ ๊ฐ์ด๋๋ฅผ ์น๊ทผํ ๋ฉ์ ์ ๋ํ ํํ๋ก 24์๊ฐ ์๋ด ๋ธ๋ฆฌํํด ์ค๋ค.\n" |
| "์ด๋ก์จ ๋ค์ด๋ฒํ์ด๋ ๊ณ ๋ํ๋ ์ด์ ๋ฐ ๋ง์ด๋ฐ์ดํฐ AI ์์ฐ ์ถ์ฒ ํ๋ซํผ์ผ๋ก ํ ๋จ๊ณ ๋์ฝํ๋ค." |
| ), |
| "entities": [ |
| {"name": "๋ค์ด๋ฒํ์ด", "type": "AICompany", "description": "์ง์ถ ๋ถ์ ๋ฐ ๊ธ์ต ์ถ์ฒ ๋ฑ ๋์งํธ ๋ง์ด๋ฐ์ดํฐ ์ํ๊ณ๋ฅผ ์ ๋ํ๋ ์ข
ํฉ ๊ธ์ต ํ๋ซํผ"}, |
| {"name": "๋ง์ด๋ฐ์ดํฐ", "type": "AITechnology", "description": "๋ถ์ฐ๋ ๊ธ์ต ๊ธฐ๊ด ์ ๋ณด๋ฅผ ํ๋ฐ ๋ชจ์ ๊ฐ์น๋ฅผ ๋ถ์ํ๋ ์ข
ํฉ ๊ธ์ต ์์ฐ ๋ฐ์ดํฐ ๊ธฐ์ "}, |
| {"name": "๋ค์ด๋ฒํ์ด AI ๊ธ์ต ๋น์", "type": "AIService", "description": "์ด๊ฑฐ๋ LLM์ ๋ง์ด๋ฐ์ดํฐ์ ๊ฒฐํฉํ์ฌ ๋ํํ ์๋ด์ ์ ๊ณตํ๋ ์์ฐ ์ปจ์คํดํธ ์๋น์ค"}, |
| {"name": "๋์งํธ๊ธ์ต", "type": "AIField", "description": "ํํ
ํฌ ์ฐ๊ณ ๊ฐ์ธ ์ง์ถ ๋ค์ด์ดํธ ๋ฐ ๋ง์ถค ์ํ ๋น๊ต ์ถ์ฒ ํ์ ์์ญ"} |
| ], |
| "relationships": [ |
| ("๋ค์ด๋ฒํ์ด", "DEVELOPS", "๋ง์ด๋ฐ์ดํฐ"), |
| ("๋ค์ด๋ฒํ์ด", "DEVELOPS", "๋ค์ด๋ฒํ์ด AI ๊ธ์ต ๋น์"), |
| ("๋ง์ด๋ฐ์ดํฐ", "APPLIES", "๋์งํธ๊ธ์ต"), |
| ("๋ค์ด๋ฒํ์ด AI ๊ธ์ต ๋น์", "USED_IN", "๋์งํธ๊ธ์ต"), |
| ("๋ค์ด๋ฒํ์ด", "PARTNERS_WITH", "์ ํ์ํ") |
| ] |
| } |
| ] |
|
|
|
|
| def main(): |
| print("[INIT] Neo4j AuraDB ๋๋ผ์ด๋ฒ ์ด๊ธฐํ ๋ฐ ์ฐ๊ฒฐ ์๋...") |
| driver = get_neo4j_driver() |
| |
| print("[INIT] [OK] Neo4j ์ฐ๊ฒฐ ๋ฌด๊ฒฐ์ฑ ๊ฒ์ฆ ํต๊ณผ") |
| |
| with driver.session() as session: |
| |
| print("[RESET] ๊ธฐ์กด ๊ทธ๋ํ ๋ฐ์ดํฐ๋ฅผ ๊นจ๋ํ๊ฒ ์ด๊ธฐํํฉ๋๋ค (DETACH DELETE)...") |
| session.run("MATCH (n) DETACH DELETE n") |
| print("[RESET] [OK] ๊ธฐ์กด ๋ฐ์ดํฐ ์์ ์ด๊ธฐํ ์๋ฃ") |
| |
| print("[LOAD] 4๋ ํํ
ํฌ ๊ณจ๋ ๋ด์ค ๋ฐ์ดํฐ ์ ์ฌ ํ๋ก์ธ์ค๋ฅผ ๊ฐ๋ํฉ๋๋ค...") |
| |
| |
| entity_types = {} |
| for a in GOLD_ARTICLES: |
| for e in a["entities"]: |
| entity_types[e["name"]] = e["type"] |
| |
| for idx, art in enumerate(GOLD_ARTICLES, 1): |
| print(f"\n({idx}/{len(GOLD_ARTICLES)}) [ART] '{art['title'][:35]}...' ์ ์ฌ ์ค...") |
| |
| |
| session.run(""" |
| MERGE (a:Article {article_id: $article_id}) |
| SET a.title = $title, |
| a.url = $url, |
| a.content = $content, |
| a.source = $source, |
| a.author = $author, |
| a.published_date = $published_date, |
| a.category = '๊ฒฝ์ ' |
| """, { |
| "article_id": art["article_id"], |
| "title": art["title"], |
| "url": art["url"], |
| "content": art["content"], |
| "source": art["source"], |
| "author": art["author"], |
| "published_date": art["published_date"] |
| }) |
| |
| |
| print(" -> ์ค์๊ฐ OpenAI 1536์ฐจ์ ๋ฒกํฐ ์๋ฒ ๋ฉ ์์ฑ ์ค...") |
| |
| paragraphs = [p.strip() for p in art["content"].split("\n") if p.strip()] |
| for chunk_idx, para in enumerate(paragraphs, 1): |
| chunk_id = f"{art['article_id']}_CHK_{chunk_idx}" |
| embedding = get_embedding(para) |
| |
| |
| session.run(""" |
| MATCH (a:Article {article_id: $article_id}) |
| MERGE (c:Content {chunk_id: $chunk_id}) |
| SET c.chunk = $chunk, |
| c.embedding = $embedding, |
| c.article_id = $article_id |
| MERGE (a)-[:HAS_CHUNK]->(c) |
| """, { |
| "article_id": art["article_id"], |
| "chunk_id": chunk_id, |
| "chunk": para, |
| "embedding": embedding |
| }) |
| |
| |
| for ent in art["entities"]: |
| |
| |
| cypher_merge = f""" |
| MERGE (e:{ent['type']} {{name: $name}}) |
| SET e.description = $description |
| RETURN e |
| """ |
| session.run(cypher_merge, {"name": ent["name"], "description": ent["description"]}) |
| |
| |
| session.run(f""" |
| MATCH (a:Article {{article_id: $article_id}}) |
| MATCH (e:{ent['type']} {{name: $name}}) |
| MERGE (a)-[:MENTIONS]->(e) |
| """, {"article_id": art["article_id"], "name": ent["name"]}) |
| |
| print(f" - [ENT] ({ent['type']}) {ent['name']} ์๋ฃ") |
| |
| |
| for src_name, rel_type, tgt_name in art["relationships"]: |
| |
| src_type = entity_types.get(src_name, "AICompany") |
| tgt_type = entity_types.get(tgt_name, "AICompany") |
| |
| cypher_rel = f""" |
| MATCH (s:{src_type} {{name: $src_name}}) |
| MATCH (t:{tgt_type} {{name: $tgt_name}}) |
| MERGE (s)-[:{rel_type}]->(t) |
| """ |
| session.run(cypher_rel, {"src_name": src_name, "tgt_name": tgt_name}) |
| print(f" - [REL] ({src_name})-[:{rel_type}]->({tgt_name}) ์ฐ๊ฒฐ") |
| |
| |
| print("\n[OK] 4๋ ํํ
ํฌ ๊ณจ๋ ๋ฐ์ดํฐ ์ ์ฌ ์๋ฃ!") |
| |
| total_rels = session.run(""" |
| MATCH ()-[r:DEVELOPS|INVESTS_IN|PARTNERS_WITH|APPLIES|USED_IN|RELATED_TO]->() |
| RETURN count(r) as cnt |
| """).single()["cnt"] |
| |
| total_articles = session.run("MATCH (a:Article) RETURN count(a) as cnt").single()["cnt"] |
| avg_density = total_rels / total_articles if total_articles > 0 else 0 |
| |
| print(f"[STATUS] ํ์ฌ ์ ์ฌ๋ ์ด ๊ธฐ์ฌ ์: {total_articles}๊ฐ") |
| print(f"[STATUS] ์ํฐํฐ ๊ฐ ์ง์ ๊ด๊ณ ์ด์: {total_rels}๊ฐ") |
| print(f"[STATUS] ๊ธฐ์ฌ๋น ํ๊ท ๊ด๊ณ์: {avg_density:.1f}๊ฐ (๋ชฉํ: 3.0๊ฐ ์ด์)") |
| |
| driver.close() |
| print("[DONE] ํ๋ก์ธ์ค ์ ์ ์ข
๋ฃ") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|