File size: 22,925 Bytes
cb92864
 
 
 
 
 
 
 
 
 
 
 
622f700
cb92864
79ef842
 
08fb91a
622f700
 
 
 
cb92864
 
 
08fb91a
 
cb92864
 
 
08fb91a
cb92864
 
 
 
7f57ffc
79ef842
 
 
 
 
e7d3bfe
79ef842
 
 
7f57ffc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9675b2d
 
 
7f57ffc
 
cb92864
 
 
9675b2d
cb92864
 
 
ff0395c
cb92864
 
 
 
3e23aae
 
 
 
 
 
 
 
 
 
 
 
 
cb92864
ff0395c
cb92864
 
 
 
 
 
3e23aae
 
 
cb92864
 
08fb91a
9675b2d
cb92864
 
 
 
 
 
 
08fb91a
cb92864
 
 
 
 
 
 
 
 
 
 
47e7138
cb92864
47e7138
622f700
 
47e7138
cb92864
47e7138
622f700
 
47e7138
cb92864
64ad66f
47e7138
622f700
 
64ad66f
 
 
 
622f700
 
64ad66f
 
 
 
622f700
 
64ad66f
47e7138
ff0395c
 
622f700
ff0395c
 
47e7138
3e23aae
 
 
 
 
 
 
47e7138
3e23aae
 
 
 
 
 
 
47e7138
3e23aae
 
47e7138
3e23aae
 
 
 
 
cb92864
 
 
 
 
 
1ecde19
64ad66f
 
 
1ecde19
64ad66f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f57ffc
64ad66f
 
 
0b09cae
 
 
64ad66f
 
7f57ffc
64ad66f
09de128
 
64ad66f
495d5e7
09de128
495d5e7
 
9731058
09de128
 
495d5e7
9731058
495d5e7
9731058
495d5e7
09de128
495d5e7
9731058
 
495d5e7
9731058
495d5e7
09de128
495d5e7
9731058
3e23aae
 
 
9731058
495d5e7
9731058
 
495d5e7
09de128
495d5e7
9731058
495d5e7
9731058
495d5e7
 
 
3e23aae
495d5e7
3e23aae
 
 
 
 
09de128
 
cb92864
 
 
64ad66f
cb92864
 
 
64ad66f
cb92864
 
9675b2d
 
 
79ef842
9675b2d
6f726d8
79ef842
 
9675b2d
 
 
 
 
eb29e6d
e7d3bfe
eb29e6d
79ef842
9675b2d
 
 
 
 
 
 
 
 
 
 
79ef842
9675b2d
 
 
 
 
 
79ef842
9675b2d
 
09de128
3e23aae
 
 
 
 
9675b2d
 
 
3e23aae
 
 
 
 
 
9675b2d
 
 
79ef842
 
9675b2d
 
 
79ef842
9675b2d
79ef842
 
9675b2d
 
 
79ef842
e7d3bfe
79ef842
 
 
 
 
 
 
 
 
 
e7d3bfe
79ef842
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7d3bfe
79ef842
 
 
 
e7d3bfe
79ef842
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7d3bfe
79ef842
 
 
9675b2d
 
fd7f235
9675b2d
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
"""
finRetrieval.py β€” GraphRAG 검색 λͺ¨λ“ˆ
=====================================
app.pyμ—μ„œ importν•˜μ—¬ Gradio 챗봇과 μ—°λ™ν•©λ‹ˆλ‹€.

μ‚¬μš©λ²•:
    from src.retrieval.finRetrieval import graphrag

    response = graphrag.search(query_text="μ‚Όμ„±μ „μž AI μ„œλΉ„μŠ€λŠ”?")
    print(response.answer)
"""

import logging
import os
from dataclasses import dataclass
from typing import Any

# Neo4j DBMS server warning (Deprecated vector queryNodes λ“±) λ‘œκΉ… 차단
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)

import dotenv
import neo4j
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.generation import GraphRAG, RagTemplate
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.retrievers import (
    Text2CypherRetriever,
    ToolsRetriever,
    VectorCypherRetriever,
)

dotenv.load_dotenv()


@dataclass
class HybridResult:
    """GraphRAG λ˜λŠ” 일반 지식 기반 톡합 응닡 κ²°κ³Ό"""

    answer: str            # μ΅œμ’… λ‹΅λ³€ λ¬Έμžμ—΄
    mode: str              # "graph": κ·Έλž˜ν”„ 검색 기반 | "general": GPT-4o-mini 일반 지식 기반
    retriever_result: Any = None  # RetrieverResult (mode="graph"일 λ•Œλ§Œ 유효)


def get_neo4j_driver() -> neo4j.Driver:
    uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
    client_id = os.getenv("NEO4J_CLIENT_ID")
    client_secret = os.getenv("NEO4J_CLIENT_SECRET")
    
    if client_id and client_secret:
        try:
            d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
            d.verify_connectivity()
            return d
        except Exception:
            pass  # Fallback to Username/Password
            
    username = os.getenv("NEO4J_USERNAME", "neo4j")
    password = os.getenv("NEO4J_PASSWORD", "password")
    d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
    d.verify_connectivity()
    return d


INDEX_NAME = "content_vector_index"

# ──────────────────────────────────────────
# 2. Retriever κ΄€λ ¨ μƒμˆ˜ 및 μ„€μ •
# ──────────────────────────────────────────

_retrieval_query = """
MATCH (node)<-[:HAS_CHUNK]-(article:Article)
OPTIONAL MATCH (article)-[:MENTIONS]->(company:AICompany)
OPTIONAL MATCH (company)-[:DEVELOPS]->(tech:AITechnology)
OPTIONAL MATCH (company)-[:DEVELOPS]->(svc:AIService)
OPTIONAL MATCH (article)-[:MENTIONS]->(field:AIField)

// 동일 κΈ°μ—…/기술/μ„œλΉ„μŠ€λ₯Ό μ–ΈκΈ‰ν•˜λŠ” κ΄€λ ¨ κΈ°μ‚¬κΉŒμ§€ ν™•μž₯ 탐색 (νš‘λ‹¨ 검색)
OPTIONAL MATCH (related_article:Article)
WHERE related_article <> article
  AND (
    EXISTS { (related_article)-[:MENTIONS]->(:AICompany)<-[:MENTIONS]-(article) }
    OR EXISTS { (related_article)-[:MENTIONS]->(:AITechnology)<-[:MENTIONS]-(article) }
    OR EXISTS { (related_article)-[:MENTIONS]->(:AIService)<-[:MENTIONS]-(article) }
  )
WITH
    node, article, company, tech, svc, field,
    collect(DISTINCT related_article.title)[..3] AS related_titles,
    collect(DISTINCT related_article.url)[..3]   AS related_urls
RETURN
    node.chunk             AS chunk,
    article.title          AS article_title,
    article.url            AS article_url,
    article.published_date AS article_date,
    collect(DISTINCT company.name) AS companies,
    collect(DISTINCT tech.name)    AS technologies,
    collect(DISTINCT svc.name)     AS services,
    collect(DISTINCT field.name)   AS fields,
    related_titles         AS related_article_titles,
    related_urls           AS related_article_urls
"""


def _get_schema(driver: neo4j.Driver) -> str:
    with driver.session() as s:
        nodes = s.run(
            "CALL db.schema.nodeTypeProperties() "
            "YIELD nodeType, propertyName "
            "RETURN nodeType, collect(propertyName) as props"
        ).data()
        rels = s.run(
            "MATCH (n)-[r]->(m) RETURN DISTINCT labels(n)[0] as src, type(r) as rel, labels(m)[0] as tgt LIMIT 30"
        ).data()
    txt = "=== Neo4j Schema ===\nλ…Έλ“œ:\n"
    for n in nodes:
        txt += f"- {n['nodeType']}: {n['props']}\n"
    txt += "\n관계:\n"
    for r in rels:
        txt += f"- ({r['src']})-[:{r['rel']}]->({r['tgt']})\n"
    return txt


_examples = [
    """USER INPUT: 카카였페이의 AI μ„œλΉ„μŠ€ λͺ©λ‘μ„ μ•Œλ €μ£Όμ„Έμš”
CYPHER QUERY:
    MATCH (c:AICompany {name:"카카였페이"})-[:DEVELOPS]->(s:AIService)
    OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
    RETURN s.name AS name, s.description AS description, a.title AS article_title, a.url AS article_url""",
    """USER INPUT: μ‹ ν•œμ€ν–‰μ΄ 개발 쀑인 AI κΈ°μˆ μ€?
CYPHER QUERY:
    MATCH (c:AICompany {name:"μ‹ ν•œμ€ν–‰"})-[:DEVELOPS]->(t:AITechnology)
    OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
    RETURN t.name AS name, t.description AS description, a.title AS article_title, a.url AS article_url""",
    """USER INPUT: μ–΄λ–€ κΈˆμœ΅μ‚¬κ°€ λ‘œλ³΄μ–΄λ“œλ°”μ΄μ € κΈ°μˆ μ„ κ°œλ°œν•˜λ‚˜μš”?
CYPHER QUERY:
    MATCH (c:AICompany)-[:DEVELOPS]->(t:AITechnology)
    WHERE t.name CONTAINS "λ‘œλ³΄μ–΄λ“œλ°”μ΄μ €" OR t.name CONTAINS "μ•Œκ³ λ¦¬μ¦˜"
    OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
    RETURN c.name AS company_name, t.name AS tech_name, a.title AS article_title, a.url AS article_url""",
    """USER INPUT: κΈˆμœ΅μ΄λ‚˜ ν•€ν…Œν¬ 뢄야에 κΈ°μˆ μ„ μ μš©ν•˜κ³  μžˆλŠ” 기업듀은 μ–΄λ””μ•Ό?
CYPHER QUERY:
    MATCH (c:AICompany)-[:DEVELOPS]->(t)-[:USED_IN]->(f:AIField)
    WHERE f.name CONTAINS "금육" OR f.name CONTAINS "ν•€ν…Œν¬"
    OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
    RETURN DISTINCT c.name AS company_name, t.name AS tech_name, f.name AS field_name, a.title AS article_title, a.url AS article_url""",
    """USER INPUT: 금육AI 뢄야에 κ°€μž₯ 적극적인 κΈ°μ—… TOP 3와 λŒ€ν‘œ μ„œλΉ„μŠ€
CYPHER QUERY:
    MATCH (c:AICompany)-[:DEVELOPS]->(s)-[:USED_IN]->(f:AIField)
    WHERE f.name CONTAINS "금육" OR f.name CONTAINS "ν•€ν…Œν¬"
    OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
    RETURN DISTINCT c.name AS company_name, s.name AS service_name, f.name AS field_name, a.title AS article_title, a.url AS article_url
    LIMIT 3""",
    """USER INPUT: 졜근 금육 AI κ΄€λ ¨ λ‰΄μŠ€ 기사λ₯Ό μš”μ•½ν•΄μ€˜
CYPHER QUERY:
    MATCH (a:Article)-[:HAS_CHUNK]->(c:Content)
    RETURN a.title AS title, a.url AS url, a.published_date AS published_date, c.chunk AS chunk
    ORDER BY a.published_date DESC
    LIMIT 3""",
    """USER INPUT: 졜근 κ°€μž₯ 관심이 높은 금육 AI 기술이 뭐야?
CYPHER QUERY:
    MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
    OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
    WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..3] AS article_titles, collect(DISTINCT a.url)[..3] AS article_urls
    ORDER BY article_count DESC
    RETURN t.name AS tech_name, t.description AS description, article_count, companies, article_titles, article_urls
    LIMIT 5""",
    """USER INPUT: 금육 AI 기술 νŠΈλ Œλ“œλ₯Ό λΆ„μ„ν•΄μ€˜
CYPHER QUERY:
    MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
    OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
    WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..2] AS article_titles, collect(DISTINCT a.url)[..2] AS article_urls
    ORDER BY article_count DESC
    RETURN t.name AS tech_name, article_count, companies, article_titles, article_urls
    LIMIT 5""",
    """USER INPUT: ν† μŠ€ λ˜λŠ” 카카였페이 κ΄€λ ¨ 금육 AI λ‰΄μŠ€ μ•Œλ €μ€˜
CYPHER QUERY:
    MATCH (a:Article)-[:MENTIONS]->(c:AICompany)
    WHERE c.name CONTAINS 'ν† μŠ€' OR c.name CONTAINS '카카였페이'
    OPTIONAL MATCH (a)-[:MENTIONS]->(t:AITechnology)
    OPTIONAL MATCH (a)-[:MENTIONS]->(s:AIService)
    RETURN a.title AS article_title, a.url AS article_url, a.published_date AS article_date,
           collect(DISTINCT c.name) AS companies, collect(DISTINCT t.name) AS technologies, collect(DISTINCT s.name) AS services
    ORDER BY a.published_date DESC LIMIT 5""",
]

# ──────────────────────────────────────────
# 3. ToolsRetriever + GraphRAG 쑰립
# ──────────────────────────────────────────


from neo4j_graphrag.retrievers.base import Retriever
from neo4j_graphrag.types import RawSearchResult, RetrieverResult


class HybridFallbackRetriever(Retriever):
    VERIFY_NEO4J_VERSION = False

    def __init__(self, tools_retriever: Retriever, fallback_retriever: Retriever) -> None:
        self.tools_retriever = tools_retriever
        self.fallback_retriever = fallback_retriever
        super().__init__(driver=tools_retriever.driver)

    def get_search_results(self, *args: Any, **kwargs: Any) -> RawSearchResult:
        return RawSearchResult(records=[])

    def search(self, query_text: str = "", **kwargs: Any) -> RetrieverResult:
        res = self.tools_retriever.search(query_text=query_text, **kwargs)
        if not res or not res.items:
            return self.fallback_retriever.search(query_text=query_text, **kwargs)
        return res


class CustomRagTemplate(RagTemplate):
    EXPECTED_INPUTS = ["context", "query_text"]

    def format(self, query_text: str, context: str, examples: str = "") -> str:
        # λΆ€λͺ¨ μ‹œκ·Έλ‹ˆμ²˜(MyPy) μ€€μˆ˜ 및 Vulture λ―Έμ‚¬μš© λ³€μˆ˜ 검사 λ°©μ–΄
        _ = examples
        return self._format(query_text=query_text, context=context)


_prompt_template = CustomRagTemplate(
    template="""당신은 AI 및 ν•€ν…Œν¬ 기술 νŠΈλ Œλ“œ μ „λ¬Έκ°€μ΄μž, μ·¨μ—… μ€€λΉ„μƒμ˜ μ—­λŸ‰ 뢄석을 λ•λŠ” μ „λž΅ μ»¨μ„€ν„΄νŠΈμž…λ‹ˆλ‹€.
λ°˜λ“œμ‹œ μ•„λž˜ 제곡된 [μ»¨ν…μŠ€νŠΈ(Neo4j 지식 κ·Έλž˜ν”„ 검색 κ²°κ³Ό)]에 κΈ°λ°˜ν•΄μ„œλ§Œ λ‹΅λ³€ν•˜κ³ , μ»¨ν…μŠ€νŠΈμ— κ·Όκ±°ν•˜μ§€ μ•Šμ€ 사싀을 μ§€μ–΄λ‚΄κ±°λ‚˜ κ°€μƒμ˜ 링크(example.com λ“±)λ₯Ό μ ˆλŒ€ μƒμ„±ν•˜μ§€ λ§ˆμ„Έμš”.

닡변은 λŒ€μ€‘μ΄λ‚˜ μ·¨μ—… 쀀비생이 μ‹€μ§ˆμ μœΌλ‘œ νŠΈλ Œλ“œλ₯Ό 깊이 있게 νŒŒμ•…ν•˜κ³  μžμ†Œμ„œ/λ©΄μ ‘ 등에 즉각 ν™œμš©ν•  수 μžˆλ„λ‘, μ•„λž˜μ˜ [κ³ μ • λΈŒλ¦¬ν•‘ λ³΄κ³ μ„œ 포맷]을 **토씨 ν•˜λ‚˜ 틀리지 μ•Šκ³  μ—„κ²©νžˆ μ€€μˆ˜**ν•˜μ—¬ 맀우 체계적이고 κΉ”λ”ν•œ λ§ˆν¬λ‹€μš΄ μ–‘μ‹μœΌλ‘œ μ •μ„±μŠ€λŸ½κ²Œ λΈŒλ¦¬ν•‘ν•΄ μ£Όμ„Έμš”.

β˜… [μ€‘μš” - 가독성 및 κ°œν–‰ κ·œμΉ™]:
각 μ£Όμš” μ„Ήμ…˜(###) μ‚¬μ΄μ—λŠ” 무쑰건 빈 쀄을 2쀄 이상 μΆ”κ°€ν•˜κ³ , λͺ¨λ“  κ°œλ³„ λͺ©λ‘ 기호(- 및 **) ν•­λͺ© 사이사이에도 λ°˜λ“œμ‹œ 1쀄 μ΄μƒμ˜ 빈 쀄(κ°œν–‰)을 μ‚½μž…ν•˜μ—¬ μ‹œκ°μ  가독성을 κ·ΉλŒ€ν™”ν•΄ μ£Όμ„Έμš”.

---

# πŸ“‹ [FinGraph AI 뢄석 λΈŒλ¦¬ν•‘]

### 1. πŸ“Š ν•œ 쀄 μš”μ•½ & 핡심 νŠΈλ Œλ“œ

- **ν•œ 쀄 μš”μ•½**: [ν•΄λ‹Ή νŠΈλ Œλ“œμ˜ 핡심 μš”μ μ„ 단 ν•œ μ€„λ‘œ λͺ…λ£Œν•˜κ²Œ μš”μ•½]

- **μ£Όμš” μΈμ‚¬μ΄νŠΈ**: [이 μ΄μŠˆκ°€ ν˜„μž¬ IT/AI 및 금육 ν•€ν…Œν¬ 업계 전체에 λ˜μ§€λŠ” 핡심 화두 기재]


### 2. πŸ” 상세 뢄석 및 팩트 정리

[μ»¨ν…μŠ€νŠΈμ— 기둝된 μ‹€μ œ 사싀 관계듀을 근거둜 ꡬ체적 사싀을 정리]

- **이슈 μ „κ°œ**: [ꡬ체적인 이슈 λ°œμƒ λ°°κ²½ 및 μ§„ν–‰ κ²½κ³Ό]

- **κΈ°μ—… 동ν–₯**: [κ΄€λ ¨ 핡심 κΈ°μ—…λ“€μ˜ μ‹€λ¬Ό λΉ„μ¦ˆλ‹ˆμŠ€ μ›€μ§μž„ 및 λŒ€μ‘ 행보. μ»¨ν…μŠ€νŠΈμ— μ—¬λŸ¬ κΈ°μ—…/기술이 μžˆλ‹€λ©΄ λͺ¨λ‘ μ–ΈκΈ‰]

- **기술 νŠΈλ Œλ“œ**: [μ»¨ν…μŠ€νŠΈμ— λ“±μž₯ν•˜λŠ” 핡심 AI κΈ°μˆ λ“€μ„ 비ꡐ/λΆ„λ₯˜ν•˜μ—¬ 전체 νŠΈλ Œλ“œ 흐름 뢄석]

- **인프라/μ‚¬νšŒμ  μš”μΈ**: [μ „λ ₯망 λΆ€μ‘±, λŒ€μ€‘μ  λΆˆμ•ˆκ°, ν•˜λ“œμ›¨μ–΄μ  μ œμ•½ 사항 λ“± 핡심 μš”μΈ]


### 3. πŸ’‘ μ·¨μ—…/μžμ†Œμ„œ/λ©΄μ ‘ μ‹€μ „ κ°€μ΄λ“œ

[μ§€μ›μžκ°€ λ©΄μ ‘μ΄λ‚˜ μžκΈ°μ†Œκ°œμ„œμ—μ„œ μ°¨λ³„ν™”λœ 톡찰을 보여쀄 수 μžˆλŠ” 방법 μ œμ‹œ]

- **금육/IT 업계 μ‹œμ‚¬μ **: [κ±°μ‹œμ μΈ νŒŒκΈ‰νš¨κ³Όμ™€ 지속가λŠ₯μ„± 관점 μ œμ‹œ]

- **μ‹€μ „ μžμ†Œμ„œ/λ©΄μ ‘ ν™œμš© Tip**: [μ§€μ›λ™κΈ°λ‚˜ μ—­λŸ‰ κΈ°μˆ μ„œ μž‘μ„± μ‹œ 본인의 μ—­λŸ‰κ³Ό μ–΄λ–»κ²Œ μ—°κ³„ν•˜μ—¬ 풀어낼지에 λŒ€ν•œ 맞좀 κ°€μ΄λ“œ]


### πŸ“° 4. κ·Όκ±° λ‰΄μŠ€ 좜처 (GraphRAG 검색 기사)

> μ»¨ν…μŠ€νŠΈμ— μ‹€μ œλ‘œ μ‘΄μž¬ν•˜λŠ” 기사 URL만 κΈ°μž¬ν•˜κ³ , μ‘΄μž¬ν•˜μ§€ μ•ŠλŠ” κΈ°μ‚¬λŠ” μ ˆλŒ€ μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
> κ²€μƒ‰λœ 기사가 μžˆλŠ” 경우 μ•„λž˜ ν˜•μ‹μœΌλ‘œ μ—΄κ±°ν•˜κ³ , μ—†μœΌλ©΄ 이 μ„Ήμ…˜μ„ μƒλž΅ν•˜μ„Έμš”.
>
> μ˜ˆμ‹œ:
> - *[기사 제λͺ©](기사 URL)* β€” λ³΄λ„μΌμž

---

질문: {query_text}

[μ»¨ν…μŠ€νŠΈ]
{context}

λ‹΅λ³€:""",
    expected_inputs=["context", "query_text"]
)


class LazyGraphRAG:
    """μž„ν¬νŠΈ μ‹œμ μ— DB 연결을 λ°©μ§€ν•˜κ³  μ‹€μ œ 호좜될 λ•Œ GraphRAG μΈμŠ€ν„΄μŠ€λ₯Ό μ΄ˆκΈ°ν™”ν•˜λŠ” μ§€μ—° 평가 ν”„λ‘μ‹œ"""

    def __init__(self) -> None:
        self._graphrag: Any = None
        self._hybrid_retriever: Any = None  # ν’ˆμ§ˆ ν‰κ°€μš© 직접 μ ‘κ·Ό κ°€λŠ₯ν•œ λ¦¬νŠΈλ¦¬λ²„
        self._rag_llm: Any = None           # 일반 지식 λ‹΅λ³€ μƒμ„±μš© LLM

    def _init_once(self) -> None:
        if self._graphrag is not None:
            return
            
        # OpenAI ν΄λΌμ΄μ–ΈνŠΈ 및 μž„λ² λ” μ§€μ—° μ΄ˆκΈ°ν™” (CI ν¬λž˜μ‹œ λ°©μ§€)
        self._rag_llm = OpenAILLM(model_name="gpt-4o-mini", model_params={"temperature": 0})
        embedder = OpenAIEmbeddings(model="text-embedding-3-small")

        driver = get_neo4j_driver()
        
        vector_cypher_retriever = VectorCypherRetriever(
            driver=driver,
            index_name=INDEX_NAME,
            retrieval_query=_retrieval_query,
            embedder=embedder,
        )
        
        text2cypher_retriever = Text2CypherRetriever(
            driver=driver,
            llm=self._rag_llm,
            neo4j_schema=_get_schema(driver),
            examples=_examples,
        )
        
        tools_retriever = ToolsRetriever(
            driver=driver,
            llm=self._rag_llm,
            tools=[
                vector_cypher_retriever.convert_to_tool(
                    name="vector_retriever",
                    description=(
                        "λ‰΄μŠ€ λ³Έλ¬Έ 의미 μœ μ‚¬λ„ 기반 검색 + μ—°κ²°λœ μ—”ν‹°ν‹°(κΈ°μ—…Β·κΈ°μˆ Β·μ„œλΉ„μŠ€Β·λΆ„μ•Ό) 관계 κ·Έλž˜ν”„ 탐색. "
                        "νŠΉμ • 주제/κΈ°μ—…/κΈ°μˆ μ— λŒ€ν•΄ λ‰΄μŠ€ 기사 및 κ΄€λ ¨ κ·Έλž˜ν”„ 관계λ₯Ό ν•¨κ»˜ 뢄석할 λ•Œ μ‚¬μš©. "
                        "예: 'ν˜„λŒ€μ°¨ AI λ‰΄μŠ€', 'νŠΉμ • 기술의 적용 사둀'."
                    ),
                ),
                text2cypher_retriever.convert_to_tool(
                    name="text2cypher_retriever",
                    description=(
                        "μžμ—°μ–΄λ₯Ό Neo4j Cypher 쿼리둜 λ³€ν™˜ν•˜μ—¬ κ·Έλž˜ν”„ ꡬ쑰λ₯Ό 집계·탐색. "
                        "'κ°€μž₯ 많이 μ–ΈκΈ‰λœ 기술', 'νŠΈλ Œλ“œ 뢄석', 'νŠΉμ • κΈ°μ—…μ˜ μ„œλΉ„μŠ€ λͺ©λ‘', "
                        "'μ–΄λ–€ 기업이 X κΈ°μˆ μ„ κ°œλ°œν•˜λ‚˜', '졜근 λ‰΄μŠ€ μš”μ•½' λ“± "
                        "집계(COUNT/ORDER BY)λ‚˜ ꡬ쑰적 관계 μ§ˆμ˜μ— λ°˜λ“œμ‹œ μ‚¬μš©."
                    ),
                ),
            ],
        )

        self._hybrid_retriever = HybridFallbackRetriever(
            tools_retriever=tools_retriever,
            fallback_retriever=vector_cypher_retriever,
        )

        self._graphrag = GraphRAG(
            llm=self._rag_llm,
            retriever=self._hybrid_retriever,
            prompt_template=_prompt_template,
        )

    def _is_context_sufficient(self, query_text: str, history: list, retriever_result: Any) -> bool:
        """κ²€μƒ‰λœ μ»¨ν…μŠ€νŠΈκ°€ 질문 및 이전 λŒ€ν™” 흐름에 μ‹€μ§ˆμ μœΌλ‘œ 도움이 λ˜λŠ” 금육/기술 λ‰΄μŠ€ 데이터인지 GPT-4o-mini둜 νŒλ‹¨"""
        if retriever_result is None:
            return False
        if not hasattr(retriever_result, "items") or not retriever_result.items:
            return False
        total_content = " ".join(
            getattr(item, "content", "") for item in retriever_result.items
        ).strip()
        if len(total_content) < 100:
            return False

        # GPT-4o-mini 기반 μ§€λŠ₯적 μžκ°€ 진단 (이전 λŒ€ν™” νžˆμŠ€ν† λ¦¬ 및 질문의 λ§₯락 κ²°ν•© νŒμ •)
        try:
            assert self._rag_llm is not None
            context_snippet = total_content[:800]

            # 이전 λŒ€ν™” νžˆμŠ€ν† λ¦¬μ˜ λ§₯락 μš”μ•½ μΆ”μΆœ (졜근 3개 λ©”μ‹œμ§€)
            normalized_history = self._normalize_history(history)
            history_summary = "μ—†μŒ"
            if normalized_history:
                history_summary = "\n".join(
                    f"- {msg['role']}: {msg['content'][:150]}" 
                    for msg in normalized_history[-3:]
                )

            routing_prompt = (
                "당신은 금육/기술 νŠΈλ Œλ“œ RAG μ‹œμŠ€ν…œμ˜ μ§€λŠ₯ν˜• λΌμš°ν„°μž…λ‹ˆλ‹€.\n"
                "μ‚¬μš©μžμ˜ [ν˜„μž¬ 질문] 및 [졜근 λŒ€ν™” νžˆμŠ€ν† λ¦¬]κ°€ μ•„λž˜ 제곡된 [κ²€μƒ‰λœ λ‰΄μŠ€ 데이터]와 의미적으둜 λ°€μ ‘ν•˜κ²Œ μ—°κ΄€λ˜μ–΄ 있고, "
                "ν•΄λ‹Ή 데이터λ₯Ό 기반으둜 μ§ˆλ¬Έμ— μ‹€μ œ ꡬ체적이고 μ‹ λ’°ν•  수 μžˆλŠ” 닡변을 μ œκ³΅ν•  수 μžˆλŠ”μ§€ ν‰κ°€ν•˜μ„Έμš”.\n\n"
                "특히, ν˜„μž¬ 질문이 '그거에 λŒ€ν•΄ μ’€ 더 μ„€λͺ…ν•΄μ€˜'λ‚˜ 'μžμ†Œμ„œ νŒμ„ 더 λ‹€λ“¬μ–΄μ€˜'와 같은 후속 λŒ€ν™”ν˜• 질문일 경우, "
                "[졜근 λŒ€ν™” νžˆμŠ€ν† λ¦¬]에 λͺ…μ‹œλœ μ£Όμš” 금육/기술 νŠΈλ Œλ“œ 주제(예: μ‚Όμ„±μ „μž AI, 카카였 AI λ“±)κ°€ "
                "μ•„λž˜ λ‰΄μŠ€ λ°μ΄ν„°μ˜ 핡심 λ‚΄μš©κ³Ό μΌμΉ˜ν•˜λŠ”μ§€ μ’…ν•©μ μœΌλ‘œ κ³ λ €ν•΄μ•Ό ν•©λ‹ˆλ‹€.\n\n"
                "λ§Œμ•½ 질문 및 λŒ€ν™” λ§₯락이 μ•„λž˜ λ‰΄μŠ€ 데이터와 μ „ν˜€ λ¬΄κ΄€ν•œ 일반 상식, 일상적인 λŒ€ν™”, μˆ˜ν•™, 예술 λ“± "
                "지식 κ·Έλž˜ν”„(λ‰΄μŠ€ λ°μ΄ν„°λ² μ΄μŠ€)에 μ—†λŠ” 주제의 질문이라면 λ°˜λ“œμ‹œ 'NO'라고 λ‹΅ν•΄μ•Ό ν•©λ‹ˆλ‹€.\n"
                "λ‰΄μŠ€ 팩트 데이터λ₯Ό κ²°ν•©ν•˜μ—¬ μ˜¬λ°”λ₯Έ 닡변을 μž‘μ„±ν•  수 μžˆλŠ” λ§₯락이라면 'YES', κ·Έλ ‡μ§€ μ•Šλ‹€λ©΄ 'NO'라고만 λ‹΅ν•˜μ„Έμš”.\n\n"
                f"[졜근 λŒ€ν™” νžˆμŠ€ν† λ¦¬]\n{history_summary}\n\n"
                f"[ν˜„μž¬ 질문]\n{query_text}\n\n"
                f"[κ²€μƒ‰λœ λ‰΄μŠ€ 데이터]\n{context_snippet}\n\n"
                "νŒμ • (YES λ˜λŠ” NO둜만 λ‹΅λ³€):"
            )
            # μ•„μ£Ό λΉ λ₯΄κ³  μ €λ ΄ν•œ 단일 토큰 YES/NO 응닡 생성
            response = self._rag_llm.invoke(
                input=routing_prompt,
                model_params={"temperature": 0, "max_tokens": 5}
            )
            decision = str(response.content).strip().upper()
            return "YES" in decision
        except Exception:
            # μ˜ˆμ™Έ λ°œμƒ μ‹œ μ•ˆμ „μ„ μœ„ν•΄ 기쑴의 κΈ°λ³Έ 길이 기반 νŒμ •μœΌλ‘œ 폴백
            return len(total_content) >= 100

    def _normalize_history(self, history: list) -> list:
        """Gradio νžˆμŠ€ν† λ¦¬(dict λ˜λŠ” tuple ν˜•μ‹)λ₯Ό LLM message_history ν˜•μ‹μœΌλ‘œ μ •κ·œν™”"""
        normalized: list = []
        for msg in history:
            if isinstance(msg, dict) and "role" in msg and "content" in msg:
                normalized.append({"role": msg["role"], "content": str(msg["content"])})
            elif isinstance(msg, (list, tuple)) and len(msg) == 2:
                if msg[0]:
                    normalized.append({"role": "user", "content": str(msg[0])})
                if msg[1]:
                    normalized.append({"role": "assistant", "content": str(msg[1])})
        return normalized

    def _generate_general_answer(self, query_text: str, history: list) -> str:
        """κ·Έλž˜ν”„ 검색 κ²°κ³Ό 없이 GPT-4o-mini 일반 μ§€μ‹μœΌλ‘œ λ‹΅λ³€ 생성 (λŒ€ν™” νžˆμŠ€ν† λ¦¬ 반영)"""
        assert self._rag_llm is not None
        system_prompt = (
            "당신은 AI 및 ν•€ν…Œν¬ 기술 νŠΈλ Œλ“œ μ „λ¬Έκ°€μ΄μž, μ·¨μ—… μ€€λΉ„μƒμ˜ μ—­λŸ‰ 뢄석을 λ•λŠ” μ „λž΅ μ»¨μ„€ν„΄νŠΈμž…λ‹ˆλ‹€.\n"
            "ν˜„μž¬ FinGraph 지식 κ·Έλž˜ν”„(Neo4j GraphRAG)μ—μ„œ κ΄€λ ¨ λ‰΄μŠ€ 기사λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.\n"
            "이전 λŒ€ν™” λ§₯락을 μΆ©λΆ„νžˆ λ°˜μ˜ν•˜κ³ , GPT-4o-mini의 일반 ν•™μŠ΅ 데이터에 κΈ°λ°˜ν•˜μ—¬ μ΅œμ„ μ„ λ‹€ν•΄ μ „λ¬Έμ μœΌλ‘œ λ‹΅λ³€ν•΄ μ£Όμ„Έμš”.\n\n"
            "[μ€‘μš” μ§€μΉ¨]\n"
            "- μ‹€μ œ μ‘΄μž¬ν•˜μ§€ μ•ŠλŠ” λ‰΄μŠ€ 링크, λ‚ μ§œ, κ°€μ§œ URL을 μ ˆλŒ€ μƒμ„±ν•˜μ§€ λ§ˆμ„Έμš”.\n"
            "- κ°€λŠ₯ν•˜λ‹€λ©΄ μ·¨μ—… 쀀비생이 λ©΄μ ‘/μžμ†Œμ„œμ— ν™œμš©ν•  수 μžˆλŠ” μ‹€μ§ˆμ μΈ μΈμ‚¬μ΄νŠΈλ₯Ό 포함해 μ£Όμ„Έμš”.\n"
            "- 닡변이 일반 AI ν•™μŠ΅ 데이터 κΈ°λ°˜μž„μ„ μˆ¨κΈ°μ§€ 말고 μžμ—°μŠ€λŸ½κ²Œ μ–ΈκΈ‰ν•˜λ©° μ‹œμž‘ν•˜μ„Έμš”."
        )
        normalized_history = self._normalize_history(history)
        response = self._rag_llm.invoke(
            input=query_text,
            message_history=normalized_history,
            system_instruction=system_prompt,
        )
        return str(response.content)

    def search_with_fallback(self, query_text: str, history: list) -> HybridResult:
        """GraphRAG 검색 -> μ»¨ν…μŠ€νŠΈ ν’ˆμ§ˆ 평가 -> 일반 지식 Fallback 톡합 λ©”μ„œλ“œ.

        Args:
            query_text: μ‚¬μš©μž 질문 ν…μŠ€νŠΈ
            history:    이전 λŒ€ν™” νžˆμŠ€ν† λ¦¬ (Gradio ν˜•μ‹)

        Returns:
            HybridResult: λ‹΅λ³€, λͺ¨λ“œ("graph"|"general"), RetrieverResult
        """
        self._init_once()
        assert self._hybrid_retriever is not None
        assert self._graphrag is not None

        # 1단계: LLM 호좜 없이 DB 쿼리만으둜 검색 μ‹€ν–‰
        retriever_result = self._hybrid_retriever.search(query_text=query_text)

        # 2단계: μ»¨ν…μŠ€νŠΈ ν’ˆμ§ˆ 평가 ν›„ λΌμš°νŒ…
        if self._is_context_sufficient(query_text, history, retriever_result):
            # 3a. κ·Έλž˜ν”„ 기반 -> GraphRAG λΈŒλ¦¬ν•‘ λ‹΅λ³€ 생성
            rag_result = self._graphrag.search(query_text=query_text)
            return HybridResult(
                answer=rag_result.answer,
                mode="graph",
                retriever_result=rag_result.retriever_result,
            )
        else:
            # 3b. 일반 지식 기반 -> νžˆμŠ€ν† λ¦¬ 포함 GPT-4o-mini 직접 호좜
            answer = self._generate_general_answer(query_text, history)
            return HybridResult(answer=answer, mode="general", retriever_result=None)

    def search(self, *args: Any, **kwargs: Any) -> Any:
        self._init_once()
        assert self._graphrag is not None
        return self._graphrag.search(*args, **kwargs)

    def __getattr__(self, name: str) -> Any:
        self._init_once()
        return getattr(self._graphrag, name)


# app.pyμ—μ„œ 이 객체λ₯Ό 직접 importν•˜μ—¬ μ‚¬μš©ν•©λ‹ˆλ‹€ (μ΄λ•ŒλŠ” DB 연결을 μ‹œλ„ν•˜μ§€ μ•ŠμŒ).
graphrag = LazyGraphRAG()