File size: 22,925 Bytes
cb92864 622f700 cb92864 79ef842 08fb91a 622f700 cb92864 08fb91a cb92864 08fb91a cb92864 7f57ffc 79ef842 e7d3bfe 79ef842 7f57ffc 9675b2d 7f57ffc cb92864 9675b2d cb92864 ff0395c cb92864 3e23aae cb92864 ff0395c cb92864 3e23aae cb92864 08fb91a 9675b2d cb92864 08fb91a cb92864 47e7138 cb92864 47e7138 622f700 47e7138 cb92864 47e7138 622f700 47e7138 cb92864 64ad66f 47e7138 622f700 64ad66f 622f700 64ad66f 622f700 64ad66f 47e7138 ff0395c 622f700 ff0395c 47e7138 3e23aae 47e7138 3e23aae 47e7138 3e23aae 47e7138 3e23aae cb92864 1ecde19 64ad66f 1ecde19 64ad66f 7f57ffc 64ad66f 0b09cae 64ad66f 7f57ffc 64ad66f 09de128 64ad66f 495d5e7 09de128 495d5e7 9731058 09de128 495d5e7 9731058 495d5e7 9731058 495d5e7 09de128 495d5e7 9731058 495d5e7 9731058 495d5e7 09de128 495d5e7 9731058 3e23aae 9731058 495d5e7 9731058 495d5e7 09de128 495d5e7 9731058 495d5e7 9731058 495d5e7 3e23aae 495d5e7 3e23aae 09de128 cb92864 64ad66f cb92864 64ad66f cb92864 9675b2d 79ef842 9675b2d 6f726d8 79ef842 9675b2d eb29e6d e7d3bfe eb29e6d 79ef842 9675b2d 79ef842 9675b2d 79ef842 9675b2d 09de128 3e23aae 9675b2d 3e23aae 9675b2d 79ef842 9675b2d 79ef842 9675b2d 79ef842 9675b2d 79ef842 e7d3bfe 79ef842 e7d3bfe 79ef842 e7d3bfe 79ef842 e7d3bfe 79ef842 e7d3bfe 79ef842 9675b2d fd7f235 9675b2d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 | """
finRetrieval.py β GraphRAG κ²μ λͺ¨λ
=====================================
app.pyμμ importνμ¬ Gradio μ±λ΄κ³Ό μ°λν©λλ€.
μ¬μ©λ²:
from src.retrieval.finRetrieval import graphrag
response = graphrag.search(query_text="μΌμ±μ μ AI μλΉμ€λ?")
print(response.answer)
"""
import logging
import os
from dataclasses import dataclass
from typing import Any
# Neo4j DBMS server warning (Deprecated vector queryNodes λ±) λ‘κΉ
μ°¨λ¨
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)
import dotenv
import neo4j
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.generation import GraphRAG, RagTemplate
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.retrievers import (
Text2CypherRetriever,
ToolsRetriever,
VectorCypherRetriever,
)
dotenv.load_dotenv()
@dataclass
class HybridResult:
"""GraphRAG λλ μΌλ° μ§μ κΈ°λ° ν΅ν© μλ΅ κ²°κ³Ό"""
answer: str # μ΅μ’
λ΅λ³ λ¬Έμμ΄
mode: str # "graph": κ·Έλν κ²μ κΈ°λ° | "general": GPT-4o-mini μΌλ° μ§μ κΈ°λ°
retriever_result: Any = None # RetrieverResult (mode="graph"μΌ λλ§ μ ν¨)
def get_neo4j_driver() -> neo4j.Driver:
uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
client_id = os.getenv("NEO4J_CLIENT_ID")
client_secret = os.getenv("NEO4J_CLIENT_SECRET")
if client_id and client_secret:
try:
d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
d.verify_connectivity()
return d
except Exception:
pass # Fallback to Username/Password
username = os.getenv("NEO4J_USERNAME", "neo4j")
password = os.getenv("NEO4J_PASSWORD", "password")
d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
d.verify_connectivity()
return d
INDEX_NAME = "content_vector_index"
# ββββββββββββββββββββββββββββββββββββββββββ
# 2. Retriever κ΄λ ¨ μμ λ° μ€μ
# ββββββββββββββββββββββββββββββββββββββββββ
_retrieval_query = """
MATCH (node)<-[:HAS_CHUNK]-(article:Article)
OPTIONAL MATCH (article)-[:MENTIONS]->(company:AICompany)
OPTIONAL MATCH (company)-[:DEVELOPS]->(tech:AITechnology)
OPTIONAL MATCH (company)-[:DEVELOPS]->(svc:AIService)
OPTIONAL MATCH (article)-[:MENTIONS]->(field:AIField)
// λμΌ κΈ°μ
/κΈ°μ /μλΉμ€λ₯Ό μΈκΈνλ κ΄λ ¨ κΈ°μ¬κΉμ§ νμ₯ νμ (ν‘λ¨ κ²μ)
OPTIONAL MATCH (related_article:Article)
WHERE related_article <> article
AND (
EXISTS { (related_article)-[:MENTIONS]->(:AICompany)<-[:MENTIONS]-(article) }
OR EXISTS { (related_article)-[:MENTIONS]->(:AITechnology)<-[:MENTIONS]-(article) }
OR EXISTS { (related_article)-[:MENTIONS]->(:AIService)<-[:MENTIONS]-(article) }
)
WITH
node, article, company, tech, svc, field,
collect(DISTINCT related_article.title)[..3] AS related_titles,
collect(DISTINCT related_article.url)[..3] AS related_urls
RETURN
node.chunk AS chunk,
article.title AS article_title,
article.url AS article_url,
article.published_date AS article_date,
collect(DISTINCT company.name) AS companies,
collect(DISTINCT tech.name) AS technologies,
collect(DISTINCT svc.name) AS services,
collect(DISTINCT field.name) AS fields,
related_titles AS related_article_titles,
related_urls AS related_article_urls
"""
def _get_schema(driver: neo4j.Driver) -> str:
with driver.session() as s:
nodes = s.run(
"CALL db.schema.nodeTypeProperties() "
"YIELD nodeType, propertyName "
"RETURN nodeType, collect(propertyName) as props"
).data()
rels = s.run(
"MATCH (n)-[r]->(m) RETURN DISTINCT labels(n)[0] as src, type(r) as rel, labels(m)[0] as tgt LIMIT 30"
).data()
txt = "=== Neo4j Schema ===\nλ
Έλ:\n"
for n in nodes:
txt += f"- {n['nodeType']}: {n['props']}\n"
txt += "\nκ΄κ³:\n"
for r in rels:
txt += f"- ({r['src']})-[:{r['rel']}]->({r['tgt']})\n"
return txt
_examples = [
"""USER INPUT: μΉ΄μΉ΄μ€νμ΄μ AI μλΉμ€ λͺ©λ‘μ μλ €μ£ΌμΈμ
CYPHER QUERY:
MATCH (c:AICompany {name:"μΉ΄μΉ΄μ€νμ΄"})-[:DEVELOPS]->(s:AIService)
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
RETURN s.name AS name, s.description AS description, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: μ νμνμ΄ κ°λ° μ€μΈ AI κΈ°μ μ?
CYPHER QUERY:
MATCH (c:AICompany {name:"μ νμν"})-[:DEVELOPS]->(t:AITechnology)
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN t.name AS name, t.description AS description, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: μ΄λ€ κΈμ΅μ¬κ° λ‘보μ΄λλ°μ΄μ κΈ°μ μ κ°λ°νλμ?
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(t:AITechnology)
WHERE t.name CONTAINS "λ‘보μ΄λλ°μ΄μ " OR t.name CONTAINS "μκ³ λ¦¬μ¦"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN c.name AS company_name, t.name AS tech_name, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: κΈμ΅μ΄λ νν
ν¬ λΆμΌμ κΈ°μ μ μ μ©νκ³ μλ κΈ°μ
λ€μ μ΄λμΌ?
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(t)-[:USED_IN]->(f:AIField)
WHERE f.name CONTAINS "κΈμ΅" OR f.name CONTAINS "νν
ν¬"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN DISTINCT c.name AS company_name, t.name AS tech_name, f.name AS field_name, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: κΈμ΅AI λΆμΌμ κ°μ₯ μ κ·Ήμ μΈ κΈ°μ
TOP 3μ λν μλΉμ€
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(s)-[:USED_IN]->(f:AIField)
WHERE f.name CONTAINS "κΈμ΅" OR f.name CONTAINS "νν
ν¬"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
RETURN DISTINCT c.name AS company_name, s.name AS service_name, f.name AS field_name, a.title AS article_title, a.url AS article_url
LIMIT 3""",
"""USER INPUT: μ΅κ·Ό κΈμ΅ AI κ΄λ ¨ λ΄μ€ κΈ°μ¬λ₯Ό μμ½ν΄μ€
CYPHER QUERY:
MATCH (a:Article)-[:HAS_CHUNK]->(c:Content)
RETURN a.title AS title, a.url AS url, a.published_date AS published_date, c.chunk AS chunk
ORDER BY a.published_date DESC
LIMIT 3""",
"""USER INPUT: μ΅κ·Ό κ°μ₯ κ΄μ¬μ΄ λμ κΈμ΅ AI κΈ°μ μ΄ λμΌ?
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..3] AS article_titles, collect(DISTINCT a.url)[..3] AS article_urls
ORDER BY article_count DESC
RETURN t.name AS tech_name, t.description AS description, article_count, companies, article_titles, article_urls
LIMIT 5""",
"""USER INPUT: κΈμ΅ AI κΈ°μ νΈλ λλ₯Ό λΆμν΄μ€
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..2] AS article_titles, collect(DISTINCT a.url)[..2] AS article_urls
ORDER BY article_count DESC
RETURN t.name AS tech_name, article_count, companies, article_titles, article_urls
LIMIT 5""",
"""USER INPUT: ν μ€ λλ μΉ΄μΉ΄μ€νμ΄ κ΄λ ¨ κΈμ΅ AI λ΄μ€ μλ €μ€
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(c:AICompany)
WHERE c.name CONTAINS 'ν μ€' OR c.name CONTAINS 'μΉ΄μΉ΄μ€νμ΄'
OPTIONAL MATCH (a)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (a)-[:MENTIONS]->(s:AIService)
RETURN a.title AS article_title, a.url AS article_url, a.published_date AS article_date,
collect(DISTINCT c.name) AS companies, collect(DISTINCT t.name) AS technologies, collect(DISTINCT s.name) AS services
ORDER BY a.published_date DESC LIMIT 5""",
]
# ββββββββββββββββββββββββββββββββββββββββββ
# 3. ToolsRetriever + GraphRAG 쑰립
# ββββββββββββββββββββββββββββββββββββββββββ
from neo4j_graphrag.retrievers.base import Retriever
from neo4j_graphrag.types import RawSearchResult, RetrieverResult
class HybridFallbackRetriever(Retriever):
VERIFY_NEO4J_VERSION = False
def __init__(self, tools_retriever: Retriever, fallback_retriever: Retriever) -> None:
self.tools_retriever = tools_retriever
self.fallback_retriever = fallback_retriever
super().__init__(driver=tools_retriever.driver)
def get_search_results(self, *args: Any, **kwargs: Any) -> RawSearchResult:
return RawSearchResult(records=[])
def search(self, query_text: str = "", **kwargs: Any) -> RetrieverResult:
res = self.tools_retriever.search(query_text=query_text, **kwargs)
if not res or not res.items:
return self.fallback_retriever.search(query_text=query_text, **kwargs)
return res
class CustomRagTemplate(RagTemplate):
EXPECTED_INPUTS = ["context", "query_text"]
def format(self, query_text: str, context: str, examples: str = "") -> str:
# λΆλͺ¨ μκ·Έλμ²(MyPy) μ€μ λ° Vulture λ―Έμ¬μ© λ³μ κ²μ¬ λ°©μ΄
_ = examples
return self._format(query_text=query_text, context=context)
_prompt_template = CustomRagTemplate(
template="""λΉμ μ AI λ° νν
ν¬ κΈ°μ νΈλ λ μ λ¬Έκ°μ΄μ, μ·¨μ
μ€λΉμμ μλ λΆμμ λλ μ λ΅ μ»¨μ€ν΄νΈμ
λλ€.
λ°λμ μλ μ 곡λ [컨ν
μ€νΈ(Neo4j μ§μ κ·Έλν κ²μ κ²°κ³Ό)]μ κΈ°λ°ν΄μλ§ λ΅λ³νκ³ , 컨ν
μ€νΈμ κ·Όκ±°νμ§ μμ μ¬μ€μ μ§μ΄λ΄κ±°λ κ°μμ λ§ν¬(example.com λ±)λ₯Ό μ λ μμ±νμ§ λ§μΈμ.
λ΅λ³μ λμ€μ΄λ μ·¨μ
μ€λΉμμ΄ μ€μ§μ μΌλ‘ νΈλ λλ₯Ό κΉμ΄ μκ² νμ
νκ³ μμμ/λ©΄μ λ±μ μ¦κ° νμ©ν μ μλλ‘, μλμ [κ³ μ λΈλ¦¬ν λ³΄κ³ μ ν¬λ§·]μ **ν μ¨ νλ νλ¦¬μ§ μκ³ μ격ν μ€μ**νμ¬ λ§€μ° μ²΄κ³μ μ΄κ³ κΉλν λ§ν¬λ€μ΄ μμμΌλ‘ μ μ±μ€λ½κ² λΈλ¦¬νν΄ μ£ΌμΈμ.
β
[μ€μ - κ°λ
μ± λ° κ°ν κ·μΉ]:
κ° μ£Όμ μΉμ
(###) μ¬μ΄μλ 무쑰건 λΉ μ€μ 2μ€ μ΄μ μΆκ°νκ³ , λͺ¨λ κ°λ³ λͺ©λ‘ κΈ°νΈ(- λ° **) νλͺ© μ¬μ΄μ¬μ΄μλ λ°λμ 1μ€ μ΄μμ λΉ μ€(κ°ν)μ μ½μ
νμ¬ μκ°μ κ°λ
μ±μ κ·Ήλνν΄ μ£ΌμΈμ.
---
# π [FinGraph AI λΆμ λΈλ¦¬ν]
### 1. π ν μ€ μμ½ & ν΅μ¬ νΈλ λ
- **ν μ€ μμ½**: [ν΄λΉ νΈλ λμ ν΅μ¬ μμ μ λ¨ ν μ€λ‘ λͺ
λ£νκ² μμ½]
- **μ£Όμ μΈμ¬μ΄νΈ**: [μ΄ μ΄μκ° νμ¬ IT/AI λ° κΈμ΅ νν
ν¬ μ
κ³ μ 체μ λμ§λ ν΅μ¬ νλ κΈ°μ¬]
### 2. π μμΈ λΆμ λ° ν©νΈ μ 리
[컨ν
μ€νΈμ κΈ°λ‘λ μ€μ μ¬μ€ κ΄κ³λ€μ κ·Όκ±°λ‘ κ΅¬μ²΄μ μ¬μ€μ μ 리]
- **μ΄μ μ κ°**: [ꡬ체μ μΈ μ΄μ λ°μ λ°°κ²½ λ° μ§ν κ²½κ³Ό]
- **κΈ°μ
λν₯**: [κ΄λ ¨ ν΅μ¬ κΈ°μ
λ€μ μ€λ¬Ό λΉμ¦λμ€ μμ§μ λ° λμ ν보. 컨ν
μ€νΈμ μ¬λ¬ κΈ°μ
/κΈ°μ μ΄ μλ€λ©΄ λͺ¨λ μΈκΈ]
- **κΈ°μ νΈλ λ**: [컨ν
μ€νΈμ λ±μ₯νλ ν΅μ¬ AI κΈ°μ λ€μ λΉκ΅/λΆλ₯νμ¬ μ 체 νΈλ λ νλ¦ λΆμ]
- **μΈνλΌ/μ¬νμ μμΈ**: [μ λ ₯λ§ λΆμ‘±, λμ€μ λΆμκ°, νλμ¨μ΄μ μ μ½ μ¬ν λ± ν΅μ¬ μμΈ]
### 3. π‘ μ·¨μ
/μμμ/λ©΄μ μ€μ κ°μ΄λ
[μ§μμκ° λ©΄μ μ΄λ μκΈ°μκ°μμμ μ°¨λ³νλ ν΅μ°°μ 보μ¬μ€ μ μλ λ°©λ² μ μ]
- **κΈμ΅/IT μ
κ³ μμ¬μ **: [κ±°μμ μΈ νκΈν¨κ³Όμ μ§μκ°λ₯μ± κ΄μ μ μ]
- **μ€μ μμμ/λ©΄μ νμ© Tip**: [μ§μλκΈ°λ μλ κΈ°μ μ μμ± μ λ³ΈμΈμ μλκ³Ό μ΄λ»κ² μ°κ³νμ¬ νμ΄λΌμ§μ λν λ§μΆ€ κ°μ΄λ]
### π° 4. κ·Όκ±° λ΄μ€ μΆμ² (GraphRAG κ²μ κΈ°μ¬)
> 컨ν
μ€νΈμ μ€μ λ‘ μ‘΄μ¬νλ κΈ°μ¬ URLλ§ κΈ°μ¬νκ³ , μ‘΄μ¬νμ§ μλ κΈ°μ¬λ μ λ μ§μ΄λ΄μ§ λ§μΈμ.
> κ²μλ κΈ°μ¬κ° μλ κ²½μ° μλ νμμΌλ‘ μ΄κ±°νκ³ , μμΌλ©΄ μ΄ μΉμ
μ μλ΅νμΈμ.
>
> μμ:
> - *[κΈ°μ¬ μ λͺ©](κΈ°μ¬ URL)* β 보λμΌμ
---
μ§λ¬Έ: {query_text}
[컨ν
μ€νΈ]
{context}
λ΅λ³:""",
expected_inputs=["context", "query_text"]
)
class LazyGraphRAG:
"""μν¬νΈ μμ μ DB μ°κ²°μ λ°©μ§νκ³ μ€μ νΈμΆλ λ GraphRAG μΈμ€ν΄μ€λ₯Ό μ΄κΈ°ννλ μ§μ° νκ° νλ‘μ"""
def __init__(self) -> None:
self._graphrag: Any = None
self._hybrid_retriever: Any = None # νμ§ νκ°μ© μ§μ μ κ·Ό κ°λ₯ν 리νΈλ¦¬λ²
self._rag_llm: Any = None # μΌλ° μ§μ λ΅λ³ μμ±μ© LLM
def _init_once(self) -> None:
if self._graphrag is not None:
return
# OpenAI ν΄λΌμ΄μΈνΈ λ° μλ² λ μ§μ° μ΄κΈ°ν (CI ν¬λμ λ°©μ§)
self._rag_llm = OpenAILLM(model_name="gpt-4o-mini", model_params={"temperature": 0})
embedder = OpenAIEmbeddings(model="text-embedding-3-small")
driver = get_neo4j_driver()
vector_cypher_retriever = VectorCypherRetriever(
driver=driver,
index_name=INDEX_NAME,
retrieval_query=_retrieval_query,
embedder=embedder,
)
text2cypher_retriever = Text2CypherRetriever(
driver=driver,
llm=self._rag_llm,
neo4j_schema=_get_schema(driver),
examples=_examples,
)
tools_retriever = ToolsRetriever(
driver=driver,
llm=self._rag_llm,
tools=[
vector_cypher_retriever.convert_to_tool(
name="vector_retriever",
description=(
"λ΄μ€ λ³Έλ¬Έ μλ―Έ μ μ¬λ κΈ°λ° κ²μ + μ°κ²°λ μν°ν°(κΈ°μ
Β·κΈ°μ Β·μλΉμ€Β·λΆμΌ) κ΄κ³ κ·Έλν νμ. "
"νΉμ μ£Όμ /κΈ°μ
/κΈ°μ μ λν΄ λ΄μ€ κΈ°μ¬ λ° κ΄λ ¨ κ·Έλν κ΄κ³λ₯Ό ν¨κ» λΆμν λ μ¬μ©. "
"μ: 'νλμ°¨ AI λ΄μ€', 'νΉμ κΈ°μ μ μ μ© μ¬λ‘'."
),
),
text2cypher_retriever.convert_to_tool(
name="text2cypher_retriever",
description=(
"μμ°μ΄λ₯Ό Neo4j Cypher μΏΌλ¦¬λ‘ λ³ννμ¬ κ·Έλν ꡬ쑰λ₯Ό μ§κ³Β·νμ. "
"'κ°μ₯ λ§μ΄ μΈκΈλ κΈ°μ ', 'νΈλ λ λΆμ', 'νΉμ κΈ°μ
μ μλΉμ€ λͺ©λ‘', "
"'μ΄λ€ κΈ°μ
μ΄ X κΈ°μ μ κ°λ°νλ', 'μ΅κ·Ό λ΄μ€ μμ½' λ± "
"μ§κ³(COUNT/ORDER BY)λ ꡬ쑰μ κ΄κ³ μ§μμ λ°λμ μ¬μ©."
),
),
],
)
self._hybrid_retriever = HybridFallbackRetriever(
tools_retriever=tools_retriever,
fallback_retriever=vector_cypher_retriever,
)
self._graphrag = GraphRAG(
llm=self._rag_llm,
retriever=self._hybrid_retriever,
prompt_template=_prompt_template,
)
def _is_context_sufficient(self, query_text: str, history: list, retriever_result: Any) -> bool:
"""κ²μλ 컨ν
μ€νΈκ° μ§λ¬Έ λ° μ΄μ λν νλ¦μ μ€μ§μ μΌλ‘ λμμ΄ λλ κΈμ΅/κΈ°μ λ΄μ€ λ°μ΄ν°μΈμ§ GPT-4o-miniλ‘ νλ¨"""
if retriever_result is None:
return False
if not hasattr(retriever_result, "items") or not retriever_result.items:
return False
total_content = " ".join(
getattr(item, "content", "") for item in retriever_result.items
).strip()
if len(total_content) < 100:
return False
# GPT-4o-mini κΈ°λ° μ§λ₯μ μκ° μ§λ¨ (μ΄μ λν νμ€ν 리 λ° μ§λ¬Έμ λ§₯λ½ κ²°ν© νμ )
try:
assert self._rag_llm is not None
context_snippet = total_content[:800]
# μ΄μ λν νμ€ν 리μ λ§₯λ½ μμ½ μΆμΆ (μ΅κ·Ό 3κ° λ©μμ§)
normalized_history = self._normalize_history(history)
history_summary = "μμ"
if normalized_history:
history_summary = "\n".join(
f"- {msg['role']}: {msg['content'][:150]}"
for msg in normalized_history[-3:]
)
routing_prompt = (
"λΉμ μ κΈμ΅/κΈ°μ νΈλ λ RAG μμ€ν
μ μ§λ₯ν λΌμ°ν°μ
λλ€.\n"
"μ¬μ©μμ [νμ¬ μ§λ¬Έ] λ° [μ΅κ·Ό λν νμ€ν 리]κ° μλ μ 곡λ [κ²μλ λ΄μ€ λ°μ΄ν°]μ μλ―Έμ μΌλ‘ λ°μ νκ² μ°κ΄λμ΄ μκ³ , "
"ν΄λΉ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ μ§λ¬Έμ μ€μ ꡬ체μ μ΄κ³ μ λ’°ν μ μλ λ΅λ³μ μ 곡ν μ μλμ§ νκ°νμΈμ.\n\n"
"νΉν, νμ¬ μ§λ¬Έμ΄ 'κ·Έκ±°μ λν΄ μ’ λ μ€λͺ
ν΄μ€'λ 'μμμ νμ λ λ€λ¬μ΄μ€'μ κ°μ νμ λνν μ§λ¬ΈμΌ κ²½μ°, "
"[μ΅κ·Ό λν νμ€ν 리]μ λͺ
μλ μ£Όμ κΈμ΅/κΈ°μ νΈλ λ μ£Όμ (μ: μΌμ±μ μ AI, μΉ΄μΉ΄μ€ AI λ±)κ° "
"μλ λ΄μ€ λ°μ΄ν°μ ν΅μ¬ λ΄μ©κ³Ό μΌμΉνλμ§ μ’
ν©μ μΌλ‘ κ³ λ €ν΄μΌ ν©λλ€.\n\n"
"λ§μ½ μ§λ¬Έ λ° λν λ§₯λ½μ΄ μλ λ΄μ€ λ°μ΄ν°μ μ ν 무κ΄ν μΌλ° μμ, μΌμμ μΈ λν, μν, μμ λ± "
"μ§μ κ·Έλν(λ΄μ€ λ°μ΄ν°λ² μ΄μ€)μ μλ μ£Όμ μ μ§λ¬Έμ΄λΌλ©΄ λ°λμ 'NO'λΌκ³ λ΅ν΄μΌ ν©λλ€.\n"
"λ΄μ€ ν©νΈ λ°μ΄ν°λ₯Ό κ²°ν©νμ¬ μ¬λ°λ₯Έ λ΅λ³μ μμ±ν μ μλ λ§₯λ½μ΄λΌλ©΄ 'YES', κ·Έλ μ§ μλ€λ©΄ 'NO'λΌκ³ λ§ λ΅νμΈμ.\n\n"
f"[μ΅κ·Ό λν νμ€ν 리]\n{history_summary}\n\n"
f"[νμ¬ μ§λ¬Έ]\n{query_text}\n\n"
f"[κ²μλ λ΄μ€ λ°μ΄ν°]\n{context_snippet}\n\n"
"νμ (YES λλ NOλ‘λ§ λ΅λ³):"
)
# μμ£Ό λΉ λ₯΄κ³ μ λ ΄ν λ¨μΌ ν ν° YES/NO μλ΅ μμ±
response = self._rag_llm.invoke(
input=routing_prompt,
model_params={"temperature": 0, "max_tokens": 5}
)
decision = str(response.content).strip().upper()
return "YES" in decision
except Exception:
# μμΈ λ°μ μ μμ μ μν΄ κΈ°μ‘΄μ κΈ°λ³Έ κΈΈμ΄ κΈ°λ° νμ μΌλ‘ ν΄λ°±
return len(total_content) >= 100
def _normalize_history(self, history: list) -> list:
"""Gradio νμ€ν 리(dict λλ tuple νμ)λ₯Ό LLM message_history νμμΌλ‘ μ κ·ν"""
normalized: list = []
for msg in history:
if isinstance(msg, dict) and "role" in msg and "content" in msg:
normalized.append({"role": msg["role"], "content": str(msg["content"])})
elif isinstance(msg, (list, tuple)) and len(msg) == 2:
if msg[0]:
normalized.append({"role": "user", "content": str(msg[0])})
if msg[1]:
normalized.append({"role": "assistant", "content": str(msg[1])})
return normalized
def _generate_general_answer(self, query_text: str, history: list) -> str:
"""κ·Έλν κ²μ κ²°κ³Ό μμ΄ GPT-4o-mini μΌλ° μ§μμΌλ‘ λ΅λ³ μμ± (λν νμ€ν 리 λ°μ)"""
assert self._rag_llm is not None
system_prompt = (
"λΉμ μ AI λ° νν
ν¬ κΈ°μ νΈλ λ μ λ¬Έκ°μ΄μ, μ·¨μ
μ€λΉμμ μλ λΆμμ λλ μ λ΅ μ»¨μ€ν΄νΈμ
λλ€.\n"
"νμ¬ FinGraph μ§μ κ·Έλν(Neo4j GraphRAG)μμ κ΄λ ¨ λ΄μ€ κΈ°μ¬λ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€.\n"
"μ΄μ λν λ§₯λ½μ μΆ©λΆν λ°μνκ³ , GPT-4o-miniμ μΌλ° νμ΅ λ°μ΄ν°μ κΈ°λ°νμ¬ μ΅μ μ λ€ν΄ μ λ¬Έμ μΌλ‘ λ΅λ³ν΄ μ£ΌμΈμ.\n\n"
"[μ€μ μ§μΉ¨]\n"
"- μ€μ μ‘΄μ¬νμ§ μλ λ΄μ€ λ§ν¬, λ μ§, κ°μ§ URLμ μ λ μμ±νμ§ λ§μΈμ.\n"
"- κ°λ₯νλ€λ©΄ μ·¨μ
μ€λΉμμ΄ λ©΄μ /μμμμ νμ©ν μ μλ μ€μ§μ μΈ μΈμ¬μ΄νΈλ₯Ό ν¬ν¨ν΄ μ£ΌμΈμ.\n"
"- λ΅λ³μ΄ μΌλ° AI νμ΅ λ°μ΄ν° κΈ°λ°μμ μ¨κΈ°μ§ λ§κ³ μμ°μ€λ½κ² μΈκΈνλ©° μμνμΈμ."
)
normalized_history = self._normalize_history(history)
response = self._rag_llm.invoke(
input=query_text,
message_history=normalized_history,
system_instruction=system_prompt,
)
return str(response.content)
def search_with_fallback(self, query_text: str, history: list) -> HybridResult:
"""GraphRAG κ²μ -> 컨ν
μ€νΈ νμ§ νκ° -> μΌλ° μ§μ Fallback ν΅ν© λ©μλ.
Args:
query_text: μ¬μ©μ μ§λ¬Έ ν
μ€νΈ
history: μ΄μ λν νμ€ν 리 (Gradio νμ)
Returns:
HybridResult: λ΅λ³, λͺ¨λ("graph"|"general"), RetrieverResult
"""
self._init_once()
assert self._hybrid_retriever is not None
assert self._graphrag is not None
# 1λ¨κ³: LLM νΈμΆ μμ΄ DB 쿼리λ§μΌλ‘ κ²μ μ€ν
retriever_result = self._hybrid_retriever.search(query_text=query_text)
# 2λ¨κ³: 컨ν
μ€νΈ νμ§ νκ° ν λΌμ°ν
if self._is_context_sufficient(query_text, history, retriever_result):
# 3a. κ·Έλν κΈ°λ° -> GraphRAG λΈλ¦¬ν λ΅λ³ μμ±
rag_result = self._graphrag.search(query_text=query_text)
return HybridResult(
answer=rag_result.answer,
mode="graph",
retriever_result=rag_result.retriever_result,
)
else:
# 3b. μΌλ° μ§μ κΈ°λ° -> νμ€ν 리 ν¬ν¨ GPT-4o-mini μ§μ νΈμΆ
answer = self._generate_general_answer(query_text, history)
return HybridResult(answer=answer, mode="general", retriever_result=None)
def search(self, *args: Any, **kwargs: Any) -> Any:
self._init_once()
assert self._graphrag is not None
return self._graphrag.search(*args, **kwargs)
def __getattr__(self, name: str) -> Any:
self._init_once()
return getattr(self._graphrag, name)
# app.pyμμ μ΄ κ°μ²΄λ₯Ό μ§μ importνμ¬ μ¬μ©ν©λλ€ (μ΄λλ DB μ°κ²°μ μλνμ§ μμ).
graphrag = LazyGraphRAG()
|