FinGraph / src /retrieval /finRetrieval.py
dev-yuje's picture
feat: complete finance graph integration and fix isolation
47e7138
"""
finRetrieval.py โ€” GraphRAG ๊ฒ€์ƒ‰ ๋ชจ๋“ˆ
=====================================
app.py์—์„œ importํ•˜์—ฌ Gradio ์ฑ—๋ด‡๊ณผ ์—ฐ๋™ํ•ฉ๋‹ˆ๋‹ค.
์‚ฌ์šฉ๋ฒ•:
from src.retrieval.finRetrieval import graphrag
response = graphrag.search(query_text="์‚ผ์„ฑ์ „์ž AI ์„œ๋น„์Šค๋Š”?")
print(response.answer)
"""
import logging
import os
from dataclasses import dataclass
from typing import Any
# Neo4j DBMS server warning (Deprecated vector queryNodes ๋“ฑ) ๋กœ๊น… ์ฐจ๋‹จ
logging.getLogger("neo4j").setLevel(logging.ERROR)
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)
import dotenv
import neo4j
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.generation import GraphRAG, RagTemplate
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.retrievers import (
Text2CypherRetriever,
ToolsRetriever,
VectorCypherRetriever,
)
dotenv.load_dotenv()
@dataclass
class HybridResult:
"""GraphRAG ๋˜๋Š” ์ผ๋ฐ˜ ์ง€์‹ ๊ธฐ๋ฐ˜ ํ†ตํ•ฉ ์‘๋‹ต ๊ฒฐ๊ณผ"""
answer: str # ์ตœ์ข… ๋‹ต๋ณ€ ๋ฌธ์ž์—ด
mode: str # "graph": ๊ทธ๋ž˜ํ”„ ๊ฒ€์ƒ‰ ๊ธฐ๋ฐ˜ | "general": GPT-4o-mini ์ผ๋ฐ˜ ์ง€์‹ ๊ธฐ๋ฐ˜
retriever_result: Any = None # RetrieverResult (mode="graph"์ผ ๋•Œ๋งŒ ์œ ํšจ)
def get_neo4j_driver() -> neo4j.Driver:
uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
client_id = os.getenv("NEO4J_CLIENT_ID")
client_secret = os.getenv("NEO4J_CLIENT_SECRET")
if client_id and client_secret:
try:
d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
d.verify_connectivity()
return d
except Exception:
pass # Fallback to Username/Password
username = os.getenv("NEO4J_USERNAME", "neo4j")
password = os.getenv("NEO4J_PASSWORD", "password")
d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
d.verify_connectivity()
return d
INDEX_NAME = "content_vector_index"
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2. Retriever ๊ด€๋ จ ์ƒ์ˆ˜ ๋ฐ ์„ค์ •
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_retrieval_query = """
MATCH (node)<-[:HAS_CHUNK]-(article:Article)
OPTIONAL MATCH (article)-[:MENTIONS]->(company:AICompany)
OPTIONAL MATCH (company)-[:DEVELOPS]->(tech:AITechnology)
OPTIONAL MATCH (company)-[:DEVELOPS]->(svc:AIService)
OPTIONAL MATCH (article)-[:MENTIONS]->(field:AIField)
// ๋™์ผ ๊ธฐ์—…/๊ธฐ์ˆ /์„œ๋น„์Šค๋ฅผ ์–ธ๊ธ‰ํ•˜๋Š” ๊ด€๋ จ ๊ธฐ์‚ฌ๊นŒ์ง€ ํ™•์žฅ ํƒ์ƒ‰ (ํšก๋‹จ ๊ฒ€์ƒ‰)
OPTIONAL MATCH (related_article:Article)
WHERE related_article <> article
AND (
EXISTS { (related_article)-[:MENTIONS]->(:AICompany)<-[:MENTIONS]-(article) }
OR EXISTS { (related_article)-[:MENTIONS]->(:AITechnology)<-[:MENTIONS]-(article) }
OR EXISTS { (related_article)-[:MENTIONS]->(:AIService)<-[:MENTIONS]-(article) }
)
WITH
node, article, company, tech, svc, field,
collect(DISTINCT related_article.title)[..3] AS related_titles,
collect(DISTINCT related_article.url)[..3] AS related_urls
RETURN
node.chunk AS chunk,
article.title AS article_title,
article.url AS article_url,
article.published_date AS article_date,
collect(DISTINCT company.name) AS companies,
collect(DISTINCT tech.name) AS technologies,
collect(DISTINCT svc.name) AS services,
collect(DISTINCT field.name) AS fields,
related_titles AS related_article_titles,
related_urls AS related_article_urls
"""
def _get_schema(driver: neo4j.Driver) -> str:
with driver.session() as s:
nodes = s.run(
"CALL db.schema.nodeTypeProperties() "
"YIELD nodeType, propertyName "
"RETURN nodeType, collect(propertyName) as props"
).data()
rels = s.run(
"MATCH (n)-[r]->(m) RETURN DISTINCT labels(n)[0] as src, type(r) as rel, labels(m)[0] as tgt LIMIT 30"
).data()
txt = "=== Neo4j Schema ===\n๋…ธ๋“œ:\n"
for n in nodes:
txt += f"- {n['nodeType']}: {n['props']}\n"
txt += "\n๊ด€๊ณ„:\n"
for r in rels:
txt += f"- ({r['src']})-[:{r['rel']}]->({r['tgt']})\n"
return txt
_examples = [
"""USER INPUT: ์นด์นด์˜คํŽ˜์ด์˜ AI ์„œ๋น„์Šค ๋ชฉ๋ก์„ ์•Œ๋ ค์ฃผ์„ธ์š”
CYPHER QUERY:
MATCH (c:AICompany {name:"์นด์นด์˜คํŽ˜์ด"})-[:DEVELOPS]->(s:AIService)
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
RETURN s.name AS name, s.description AS description, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: ์‹ ํ•œ์€ํ–‰์ด ๊ฐœ๋ฐœ ์ค‘์ธ AI ๊ธฐ์ˆ ์€?
CYPHER QUERY:
MATCH (c:AICompany {name:"์‹ ํ•œ์€ํ–‰"})-[:DEVELOPS]->(t:AITechnology)
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN t.name AS name, t.description AS description, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: ์–ด๋–ค ๊ธˆ์œต์‚ฌ๊ฐ€ ๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ € ๊ธฐ์ˆ ์„ ๊ฐœ๋ฐœํ•˜๋‚˜์š”?
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(t:AITechnology)
WHERE t.name CONTAINS "๋กœ๋ณด์–ด๋“œ๋ฐ”์ด์ €" OR t.name CONTAINS "์•Œ๊ณ ๋ฆฌ์ฆ˜"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN c.name AS company_name, t.name AS tech_name, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: ๊ธˆ์œต์ด๋‚˜ ํ•€ํ…Œํฌ ๋ถ„์•ผ์— ๊ธฐ์ˆ ์„ ์ ์šฉํ•˜๊ณ  ์žˆ๋Š” ๊ธฐ์—…๋“ค์€ ์–ด๋””์•ผ?
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(t)-[:USED_IN]->(f:AIField)
WHERE f.name CONTAINS "๊ธˆ์œต" OR f.name CONTAINS "ํ•€ํ…Œํฌ"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(t)
RETURN DISTINCT c.name AS company_name, t.name AS tech_name, f.name AS field_name, a.title AS article_title, a.url AS article_url""",
"""USER INPUT: ๊ธˆ์œตAI ๋ถ„์•ผ์— ๊ฐ€์žฅ ์ ๊ทน์ ์ธ ๊ธฐ์—… TOP 3์™€ ๋Œ€ํ‘œ ์„œ๋น„์Šค
CYPHER QUERY:
MATCH (c:AICompany)-[:DEVELOPS]->(s)-[:USED_IN]->(f:AIField)
WHERE f.name CONTAINS "๊ธˆ์œต" OR f.name CONTAINS "ํ•€ํ…Œํฌ"
OPTIONAL MATCH (a:Article)-[:MENTIONS]->(s)
RETURN DISTINCT c.name AS company_name, s.name AS service_name, f.name AS field_name, a.title AS article_title, a.url AS article_url
LIMIT 3""",
"""USER INPUT: ์ตœ๊ทผ ๊ธˆ์œต AI ๊ด€๋ จ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์š”์•ฝํ•ด์ค˜
CYPHER QUERY:
MATCH (a:Article)-[:HAS_CHUNK]->(c:Content)
RETURN a.title AS title, a.url AS url, a.published_date AS published_date, c.chunk AS chunk
ORDER BY a.published_date DESC
LIMIT 3""",
"""USER INPUT: ์ตœ๊ทผ ๊ฐ€์žฅ ๊ด€์‹ฌ์ด ๋†’์€ ๊ธˆ์œต AI ๊ธฐ์ˆ ์ด ๋ญ์•ผ?
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..3] AS article_titles, collect(DISTINCT a.url)[..3] AS article_urls
ORDER BY article_count DESC
RETURN t.name AS tech_name, t.description AS description, article_count, companies, article_titles, article_urls
LIMIT 5""",
"""USER INPUT: ๊ธˆ์œต AI ๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ๋ฅผ ๋ถ„์„ํ•ด์ค˜
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (c:AICompany)-[:DEVELOPS]->(t)
WITH t, count(DISTINCT a) AS article_count, collect(DISTINCT c.name)[..3] AS companies, collect(DISTINCT a.title)[..2] AS article_titles, collect(DISTINCT a.url)[..2] AS article_urls
ORDER BY article_count DESC
RETURN t.name AS tech_name, article_count, companies, article_titles, article_urls
LIMIT 5""",
"""USER INPUT: ํ† ์Šค ๋˜๋Š” ์นด์นด์˜คํŽ˜์ด ๊ด€๋ จ ๊ธˆ์œต AI ๋‰ด์Šค ์•Œ๋ ค์ค˜
CYPHER QUERY:
MATCH (a:Article)-[:MENTIONS]->(c:AICompany)
WHERE c.name CONTAINS 'ํ† ์Šค' OR c.name CONTAINS '์นด์นด์˜คํŽ˜์ด'
OPTIONAL MATCH (a)-[:MENTIONS]->(t:AITechnology)
OPTIONAL MATCH (a)-[:MENTIONS]->(s:AIService)
RETURN a.title AS article_title, a.url AS article_url, a.published_date AS article_date,
collect(DISTINCT c.name) AS companies, collect(DISTINCT t.name) AS technologies, collect(DISTINCT s.name) AS services
ORDER BY a.published_date DESC LIMIT 5""",
]
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3. ToolsRetriever + GraphRAG ์กฐ๋ฆฝ
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
from neo4j_graphrag.retrievers.base import Retriever
from neo4j_graphrag.types import RawSearchResult, RetrieverResult
class HybridFallbackRetriever(Retriever):
VERIFY_NEO4J_VERSION = False
def __init__(self, tools_retriever: Retriever, fallback_retriever: Retriever) -> None:
self.tools_retriever = tools_retriever
self.fallback_retriever = fallback_retriever
super().__init__(driver=tools_retriever.driver)
def get_search_results(self, *args: Any, **kwargs: Any) -> RawSearchResult:
return RawSearchResult(records=[])
def search(self, query_text: str = "", **kwargs: Any) -> RetrieverResult:
res = self.tools_retriever.search(query_text=query_text, **kwargs)
if not res or not res.items:
return self.fallback_retriever.search(query_text=query_text, **kwargs)
return res
class CustomRagTemplate(RagTemplate):
EXPECTED_INPUTS = ["context", "query_text"]
def format(self, query_text: str, context: str, examples: str = "") -> str:
# ๋ถ€๋ชจ ์‹œ๊ทธ๋‹ˆ์ฒ˜(MyPy) ์ค€์ˆ˜ ๋ฐ Vulture ๋ฏธ์‚ฌ์šฉ ๋ณ€์ˆ˜ ๊ฒ€์‚ฌ ๋ฐฉ์–ด
_ = examples
return self._format(query_text=query_text, context=context)
_prompt_template = CustomRagTemplate(
template="""๋‹น์‹ ์€ AI ๋ฐ ํ•€ํ…Œํฌ ๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ ์ „๋ฌธ๊ฐ€์ด์ž, ์ทจ์—… ์ค€๋น„์ƒ์˜ ์—ญ๋Ÿ‰ ๋ถ„์„์„ ๋•๋Š” ์ „๋žต ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
๋ฐ˜๋“œ์‹œ ์•„๋ž˜ ์ œ๊ณต๋œ [์ปจํ…์ŠคํŠธ(Neo4j ์ง€์‹ ๊ทธ๋ž˜ํ”„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ)]์— ๊ธฐ๋ฐ˜ํ•ด์„œ๋งŒ ๋‹ต๋ณ€ํ•˜๊ณ , ์ปจํ…์ŠคํŠธ์— ๊ทผ๊ฑฐํ•˜์ง€ ์•Š์€ ์‚ฌ์‹ค์„ ์ง€์–ด๋‚ด๊ฑฐ๋‚˜ ๊ฐ€์ƒ์˜ ๋งํฌ(example.com ๋“ฑ)๋ฅผ ์ ˆ๋Œ€ ์ƒ์„ฑํ•˜์ง€ ๋งˆ์„ธ์š”.
๋‹ต๋ณ€์€ ๋Œ€์ค‘์ด๋‚˜ ์ทจ์—… ์ค€๋น„์ƒ์ด ์‹ค์งˆ์ ์œผ๋กœ ํŠธ๋ Œ๋“œ๋ฅผ ๊นŠ์ด ์žˆ๊ฒŒ ํŒŒ์•…ํ•˜๊ณ  ์ž์†Œ์„œ/๋ฉด์ ‘ ๋“ฑ์— ์ฆ‰๊ฐ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋„๋ก, ์•„๋ž˜์˜ [๊ณ ์ • ๋ธŒ๋ฆฌํ•‘ ๋ณด๊ณ ์„œ ํฌ๋งท]์„ **ํ† ์”จ ํ•˜๋‚˜ ํ‹€๋ฆฌ์ง€ ์•Š๊ณ  ์—„๊ฒฉํžˆ ์ค€์ˆ˜**ํ•˜์—ฌ ๋งค์šฐ ์ฒด๊ณ„์ ์ด๊ณ  ๊น”๋”ํ•œ ๋งˆํฌ๋‹ค์šด ์–‘์‹์œผ๋กœ ์ •์„ฑ์Šค๋Ÿฝ๊ฒŒ ๋ธŒ๋ฆฌํ•‘ํ•ด ์ฃผ์„ธ์š”.
โ˜… [์ค‘์š” - ๊ฐ€๋…์„ฑ ๋ฐ ๊ฐœํ–‰ ๊ทœ์น™]:
๊ฐ ์ฃผ์š” ์„น์…˜(###) ์‚ฌ์ด์—๋Š” ๋ฌด์กฐ๊ฑด ๋นˆ ์ค„์„ 2์ค„ ์ด์ƒ ์ถ”๊ฐ€ํ•˜๊ณ , ๋ชจ๋“  ๊ฐœ๋ณ„ ๋ชฉ๋ก ๊ธฐํ˜ธ(- ๋ฐ **) ํ•ญ๋ชฉ ์‚ฌ์ด์‚ฌ์ด์—๋„ ๋ฐ˜๋“œ์‹œ 1์ค„ ์ด์ƒ์˜ ๋นˆ ์ค„(๊ฐœํ–‰)์„ ์‚ฝ์ž…ํ•˜์—ฌ ์‹œ๊ฐ์  ๊ฐ€๋…์„ฑ์„ ๊ทน๋Œ€ํ™”ํ•ด ์ฃผ์„ธ์š”.
---
# ๐Ÿ“‹ [FinGraph AI ๋ถ„์„ ๋ธŒ๋ฆฌํ•‘]
### 1. ๐Ÿ“Š ํ•œ ์ค„ ์š”์•ฝ & ํ•ต์‹ฌ ํŠธ๋ Œ๋“œ
- **ํ•œ ์ค„ ์š”์•ฝ**: [ํ•ด๋‹น ํŠธ๋ Œ๋“œ์˜ ํ•ต์‹ฌ ์š”์ ์„ ๋‹จ ํ•œ ์ค„๋กœ ๋ช…๋ฃŒํ•˜๊ฒŒ ์š”์•ฝ]
- **์ฃผ์š” ์ธ์‚ฌ์ดํŠธ**: [์ด ์ด์Šˆ๊ฐ€ ํ˜„์žฌ IT/AI ๋ฐ ๊ธˆ์œต ํ•€ํ…Œํฌ ์—…๊ณ„ ์ „์ฒด์— ๋˜์ง€๋Š” ํ•ต์‹ฌ ํ™”๋‘ ๊ธฐ์žฌ]
### 2. ๐Ÿ” ์ƒ์„ธ ๋ถ„์„ ๋ฐ ํŒฉํŠธ ์ •๋ฆฌ
[์ปจํ…์ŠคํŠธ์— ๊ธฐ๋ก๋œ ์‹ค์ œ ์‚ฌ์‹ค ๊ด€๊ณ„๋“ค์„ ๊ทผ๊ฑฐ๋กœ ๊ตฌ์ฒด์  ์‚ฌ์‹ค์„ ์ •๋ฆฌ]
- **์ด์Šˆ ์ „๊ฐœ**: [๊ตฌ์ฒด์ ์ธ ์ด์Šˆ ๋ฐœ์ƒ ๋ฐฐ๊ฒฝ ๋ฐ ์ง„ํ–‰ ๊ฒฝ๊ณผ]
- **๊ธฐ์—… ๋™ํ–ฅ**: [๊ด€๋ จ ํ•ต์‹ฌ ๊ธฐ์—…๋“ค์˜ ์‹ค๋ฌผ ๋น„์ฆˆ๋‹ˆ์Šค ์›€์ง์ž„ ๋ฐ ๋Œ€์‘ ํ–‰๋ณด. ์ปจํ…์ŠคํŠธ์— ์—ฌ๋Ÿฌ ๊ธฐ์—…/๊ธฐ์ˆ ์ด ์žˆ๋‹ค๋ฉด ๋ชจ๋‘ ์–ธ๊ธ‰]
- **๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ**: [์ปจํ…์ŠคํŠธ์— ๋“ฑ์žฅํ•˜๋Š” ํ•ต์‹ฌ AI ๊ธฐ์ˆ ๋“ค์„ ๋น„๊ต/๋ถ„๋ฅ˜ํ•˜์—ฌ ์ „์ฒด ํŠธ๋ Œ๋“œ ํ๋ฆ„ ๋ถ„์„]
- **์ธํ”„๋ผ/์‚ฌํšŒ์  ์š”์ธ**: [์ „๋ ฅ๋ง ๋ถ€์กฑ, ๋Œ€์ค‘์  ๋ถˆ์•ˆ๊ฐ, ํ•˜๋“œ์›จ์–ด์  ์ œ์•ฝ ์‚ฌํ•ญ ๋“ฑ ํ•ต์‹ฌ ์š”์ธ]
### 3. ๐Ÿ’ก ์ทจ์—…/์ž์†Œ์„œ/๋ฉด์ ‘ ์‹ค์ „ ๊ฐ€์ด๋“œ
[์ง€์›์ž๊ฐ€ ๋ฉด์ ‘์ด๋‚˜ ์ž๊ธฐ์†Œ๊ฐœ์„œ์—์„œ ์ฐจ๋ณ„ํ™”๋œ ํ†ต์ฐฐ์„ ๋ณด์—ฌ์ค„ ์ˆ˜ ์žˆ๋Š” ๋ฐฉ๋ฒ• ์ œ์‹œ]
- **๊ธˆ์œต/IT ์—…๊ณ„ ์‹œ์‚ฌ์ **: [๊ฑฐ์‹œ์ ์ธ ํŒŒ๊ธ‰ํšจ๊ณผ์™€ ์ง€์†๊ฐ€๋Šฅ์„ฑ ๊ด€์  ์ œ์‹œ]
- **์‹ค์ „ ์ž์†Œ์„œ/๋ฉด์ ‘ ํ™œ์šฉ Tip**: [์ง€์›๋™๊ธฐ๋‚˜ ์—ญ๋Ÿ‰ ๊ธฐ์ˆ ์„œ ์ž‘์„ฑ ์‹œ ๋ณธ์ธ์˜ ์—ญ๋Ÿ‰๊ณผ ์–ด๋–ป๊ฒŒ ์—ฐ๊ณ„ํ•˜์—ฌ ํ’€์–ด๋‚ผ์ง€์— ๋Œ€ํ•œ ๋งž์ถค ๊ฐ€์ด๋“œ]
### ๐Ÿ“ฐ 4. ๊ทผ๊ฑฐ ๋‰ด์Šค ์ถœ์ฒ˜ (GraphRAG ๊ฒ€์ƒ‰ ๊ธฐ์‚ฌ)
> ์ปจํ…์ŠคํŠธ์— ์‹ค์ œ๋กœ ์กด์žฌํ•˜๋Š” ๊ธฐ์‚ฌ URL๋งŒ ๊ธฐ์žฌํ•˜๊ณ , ์กด์žฌํ•˜์ง€ ์•Š๋Š” ๊ธฐ์‚ฌ๋Š” ์ ˆ๋Œ€ ์ง€์–ด๋‚ด์ง€ ๋งˆ์„ธ์š”.
> ๊ฒ€์ƒ‰๋œ ๊ธฐ์‚ฌ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ์•„๋ž˜ ํ˜•์‹์œผ๋กœ ์—ด๊ฑฐํ•˜๊ณ , ์—†์œผ๋ฉด ์ด ์„น์…˜์„ ์ƒ๋žตํ•˜์„ธ์š”.
>
> ์˜ˆ์‹œ:
> - *[๊ธฐ์‚ฌ ์ œ๋ชฉ](๊ธฐ์‚ฌ URL)* โ€” ๋ณด๋„์ผ์ž
---
์งˆ๋ฌธ: {query_text}
[์ปจํ…์ŠคํŠธ]
{context}
๋‹ต๋ณ€:""",
expected_inputs=["context", "query_text"]
)
class LazyGraphRAG:
"""์ž„ํฌํŠธ ์‹œ์ ์— DB ์—ฐ๊ฒฐ์„ ๋ฐฉ์ง€ํ•˜๊ณ  ์‹ค์ œ ํ˜ธ์ถœ๋  ๋•Œ GraphRAG ์ธ์Šคํ„ด์Šค๋ฅผ ์ดˆ๊ธฐํ™”ํ•˜๋Š” ์ง€์—ฐ ํ‰๊ฐ€ ํ”„๋ก์‹œ"""
def __init__(self) -> None:
self._graphrag: Any = None
self._hybrid_retriever: Any = None # ํ’ˆ์งˆ ํ‰๊ฐ€์šฉ ์ง์ ‘ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•œ ๋ฆฌํŠธ๋ฆฌ๋ฒ„
self._rag_llm: Any = None # ์ผ๋ฐ˜ ์ง€์‹ ๋‹ต๋ณ€ ์ƒ์„ฑ์šฉ LLM
def _init_once(self) -> None:
if self._graphrag is not None:
return
# OpenAI ํด๋ผ์ด์–ธํŠธ ๋ฐ ์ž„๋ฒ ๋” ์ง€์—ฐ ์ดˆ๊ธฐํ™” (CI ํฌ๋ž˜์‹œ ๋ฐฉ์ง€)
self._rag_llm = OpenAILLM(model_name="gpt-4o-mini", model_params={"temperature": 0})
embedder = OpenAIEmbeddings(model="text-embedding-3-small")
driver = get_neo4j_driver()
vector_cypher_retriever = VectorCypherRetriever(
driver=driver,
index_name=INDEX_NAME,
retrieval_query=_retrieval_query,
embedder=embedder,
)
text2cypher_retriever = Text2CypherRetriever(
driver=driver,
llm=self._rag_llm,
neo4j_schema=_get_schema(driver),
examples=_examples,
)
tools_retriever = ToolsRetriever(
driver=driver,
llm=self._rag_llm,
tools=[
vector_cypher_retriever.convert_to_tool(
name="vector_retriever",
description=(
"๋‰ด์Šค ๋ณธ๋ฌธ ์˜๋ฏธ ์œ ์‚ฌ๋„ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰ + ์—ฐ๊ฒฐ๋œ ์—”ํ‹ฐํ‹ฐ(๊ธฐ์—…ยท๊ธฐ์ˆ ยท์„œ๋น„์Šคยท๋ถ„์•ผ) ๊ด€๊ณ„ ๊ทธ๋ž˜ํ”„ ํƒ์ƒ‰. "
"ํŠน์ • ์ฃผ์ œ/๊ธฐ์—…/๊ธฐ์ˆ ์— ๋Œ€ํ•ด ๋‰ด์Šค ๊ธฐ์‚ฌ ๋ฐ ๊ด€๋ จ ๊ทธ๋ž˜ํ”„ ๊ด€๊ณ„๋ฅผ ํ•จ๊ป˜ ๋ถ„์„ํ•  ๋•Œ ์‚ฌ์šฉ. "
"์˜ˆ: 'ํ˜„๋Œ€์ฐจ AI ๋‰ด์Šค', 'ํŠน์ • ๊ธฐ์ˆ ์˜ ์ ์šฉ ์‚ฌ๋ก€'."
),
),
text2cypher_retriever.convert_to_tool(
name="text2cypher_retriever",
description=(
"์ž์—ฐ์–ด๋ฅผ Neo4j Cypher ์ฟผ๋ฆฌ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๊ทธ๋ž˜ํ”„ ๊ตฌ์กฐ๋ฅผ ์ง‘๊ณ„ยทํƒ์ƒ‰. "
"'๊ฐ€์žฅ ๋งŽ์ด ์–ธ๊ธ‰๋œ ๊ธฐ์ˆ ', 'ํŠธ๋ Œ๋“œ ๋ถ„์„', 'ํŠน์ • ๊ธฐ์—…์˜ ์„œ๋น„์Šค ๋ชฉ๋ก', "
"'์–ด๋–ค ๊ธฐ์—…์ด X ๊ธฐ์ˆ ์„ ๊ฐœ๋ฐœํ•˜๋‚˜', '์ตœ๊ทผ ๋‰ด์Šค ์š”์•ฝ' ๋“ฑ "
"์ง‘๊ณ„(COUNT/ORDER BY)๋‚˜ ๊ตฌ์กฐ์  ๊ด€๊ณ„ ์งˆ์˜์— ๋ฐ˜๋“œ์‹œ ์‚ฌ์šฉ."
),
),
],
)
self._hybrid_retriever = HybridFallbackRetriever(
tools_retriever=tools_retriever,
fallback_retriever=vector_cypher_retriever,
)
self._graphrag = GraphRAG(
llm=self._rag_llm,
retriever=self._hybrid_retriever,
prompt_template=_prompt_template,
)
def _is_context_sufficient(self, query_text: str, history: list, retriever_result: Any) -> bool:
"""๊ฒ€์ƒ‰๋œ ์ปจํ…์ŠคํŠธ๊ฐ€ ์งˆ๋ฌธ ๋ฐ ์ด์ „ ๋Œ€ํ™” ํ๋ฆ„์— ์‹ค์งˆ์ ์œผ๋กœ ๋„์›€์ด ๋˜๋Š” ๊ธˆ์œต/๊ธฐ์ˆ  ๋‰ด์Šค ๋ฐ์ดํ„ฐ์ธ์ง€ GPT-4o-mini๋กœ ํŒ๋‹จ"""
if retriever_result is None:
return False
if not hasattr(retriever_result, "items") or not retriever_result.items:
return False
total_content = " ".join(
getattr(item, "content", "") for item in retriever_result.items
).strip()
if len(total_content) < 100:
return False
# GPT-4o-mini ๊ธฐ๋ฐ˜ ์ง€๋Šฅ์  ์ž๊ฐ€ ์ง„๋‹จ (์ด์ „ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ ์งˆ๋ฌธ์˜ ๋งฅ๋ฝ ๊ฒฐํ•ฉ ํŒ์ •)
try:
assert self._rag_llm is not None
context_snippet = total_content[:800]
# ์ด์ „ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ์˜ ๋งฅ๋ฝ ์š”์•ฝ ์ถ”์ถœ (์ตœ๊ทผ 3๊ฐœ ๋ฉ”์‹œ์ง€)
normalized_history = self._normalize_history(history)
history_summary = "์—†์Œ"
if normalized_history:
history_summary = "\n".join(
f"- {msg['role']}: {msg['content'][:150]}"
for msg in normalized_history[-3:]
)
routing_prompt = (
"๋‹น์‹ ์€ ๊ธˆ์œต/๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ RAG ์‹œ์Šคํ…œ์˜ ์ง€๋Šฅํ˜• ๋ผ์šฐํ„ฐ์ž…๋‹ˆ๋‹ค.\n"
"์‚ฌ์šฉ์ž์˜ [ํ˜„์žฌ ์งˆ๋ฌธ] ๋ฐ [์ตœ๊ทผ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ]๊ฐ€ ์•„๋ž˜ ์ œ๊ณต๋œ [๊ฒ€์ƒ‰๋œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ]์™€ ์˜๋ฏธ์ ์œผ๋กœ ๋ฐ€์ ‘ํ•˜๊ฒŒ ์—ฐ๊ด€๋˜์–ด ์žˆ๊ณ , "
"ํ•ด๋‹น ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ์‹ค์ œ ๊ตฌ์ฒด์ ์ด๊ณ  ์‹ ๋ขฐํ•  ์ˆ˜ ์žˆ๋Š” ๋‹ต๋ณ€์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š”์ง€ ํ‰๊ฐ€ํ•˜์„ธ์š”.\n\n"
"ํŠนํžˆ, ํ˜„์žฌ ์งˆ๋ฌธ์ด '๊ทธ๊ฑฐ์— ๋Œ€ํ•ด ์ข€ ๋” ์„ค๋ช…ํ•ด์ค˜'๋‚˜ '์ž์†Œ์„œ ํŒ์„ ๋” ๋‹ค๋“ฌ์–ด์ค˜'์™€ ๊ฐ™์€ ํ›„์† ๋Œ€ํ™”ํ˜• ์งˆ๋ฌธ์ผ ๊ฒฝ์šฐ, "
"[์ตœ๊ทผ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ]์— ๋ช…์‹œ๋œ ์ฃผ์š” ๊ธˆ์œต/๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ ์ฃผ์ œ(์˜ˆ: ์‚ผ์„ฑ์ „์ž AI, ์นด์นด์˜ค AI ๋“ฑ)๊ฐ€ "
"์•„๋ž˜ ๋‰ด์Šค ๋ฐ์ดํ„ฐ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ๊ณผ ์ผ์น˜ํ•˜๋Š”์ง€ ์ข…ํ•ฉ์ ์œผ๋กœ ๊ณ ๋ คํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.\n\n"
"๋งŒ์•ฝ ์งˆ๋ฌธ ๋ฐ ๋Œ€ํ™” ๋งฅ๋ฝ์ด ์•„๋ž˜ ๋‰ด์Šค ๋ฐ์ดํ„ฐ์™€ ์ „ํ˜€ ๋ฌด๊ด€ํ•œ ์ผ๋ฐ˜ ์ƒ์‹, ์ผ์ƒ์ ์ธ ๋Œ€ํ™”, ์ˆ˜ํ•™, ์˜ˆ์ˆ  ๋“ฑ "
"์ง€์‹ ๊ทธ๋ž˜ํ”„(๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค)์— ์—†๋Š” ์ฃผ์ œ์˜ ์งˆ๋ฌธ์ด๋ผ๋ฉด ๋ฐ˜๋“œ์‹œ 'NO'๋ผ๊ณ  ๋‹ตํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.\n"
"๋‰ด์Šค ํŒฉํŠธ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฒฐํ•ฉํ•˜์—ฌ ์˜ฌ๋ฐ”๋ฅธ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•  ์ˆ˜ ์žˆ๋Š” ๋งฅ๋ฝ์ด๋ผ๋ฉด 'YES', ๊ทธ๋ ‡์ง€ ์•Š๋‹ค๋ฉด 'NO'๋ผ๊ณ ๋งŒ ๋‹ตํ•˜์„ธ์š”.\n\n"
f"[์ตœ๊ทผ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ]\n{history_summary}\n\n"
f"[ํ˜„์žฌ ์งˆ๋ฌธ]\n{query_text}\n\n"
f"[๊ฒ€์ƒ‰๋œ ๋‰ด์Šค ๋ฐ์ดํ„ฐ]\n{context_snippet}\n\n"
"ํŒ์ • (YES ๋˜๋Š” NO๋กœ๋งŒ ๋‹ต๋ณ€):"
)
# ์•„์ฃผ ๋น ๋ฅด๊ณ  ์ €๋ ดํ•œ ๋‹จ์ผ ํ† ํฐ YES/NO ์‘๋‹ต ์ƒ์„ฑ
response = self._rag_llm.invoke(
input=routing_prompt,
model_params={"temperature": 0, "max_tokens": 5}
)
decision = str(response.content).strip().upper()
return "YES" in decision
except Exception:
# ์˜ˆ์™ธ ๋ฐœ์ƒ ์‹œ ์•ˆ์ „์„ ์œ„ํ•ด ๊ธฐ์กด์˜ ๊ธฐ๋ณธ ๊ธธ์ด ๊ธฐ๋ฐ˜ ํŒ์ •์œผ๋กœ ํด๋ฐฑ
return len(total_content) >= 100
def _normalize_history(self, history: list) -> list:
"""Gradio ํžˆ์Šคํ† ๋ฆฌ(dict ๋˜๋Š” tuple ํ˜•์‹)๋ฅผ LLM message_history ํ˜•์‹์œผ๋กœ ์ •๊ทœํ™”"""
normalized: list = []
for msg in history:
if isinstance(msg, dict) and "role" in msg and "content" in msg:
normalized.append({"role": msg["role"], "content": str(msg["content"])})
elif isinstance(msg, (list, tuple)) and len(msg) == 2:
if msg[0]:
normalized.append({"role": "user", "content": str(msg[0])})
if msg[1]:
normalized.append({"role": "assistant", "content": str(msg[1])})
return normalized
def _generate_general_answer(self, query_text: str, history: list) -> str:
"""๊ทธ๋ž˜ํ”„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์—†์ด GPT-4o-mini ์ผ๋ฐ˜ ์ง€์‹์œผ๋กœ ๋‹ต๋ณ€ ์ƒ์„ฑ (๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ˜์˜)"""
assert self._rag_llm is not None
system_prompt = (
"๋‹น์‹ ์€ AI ๋ฐ ํ•€ํ…Œํฌ ๊ธฐ์ˆ  ํŠธ๋ Œ๋“œ ์ „๋ฌธ๊ฐ€์ด์ž, ์ทจ์—… ์ค€๋น„์ƒ์˜ ์—ญ๋Ÿ‰ ๋ถ„์„์„ ๋•๋Š” ์ „๋žต ์ปจ์„คํ„ดํŠธ์ž…๋‹ˆ๋‹ค.\n"
"ํ˜„์žฌ FinGraph ์ง€์‹ ๊ทธ๋ž˜ํ”„(Neo4j GraphRAG)์—์„œ ๊ด€๋ จ ๋‰ด์Šค ๊ธฐ์‚ฌ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
"์ด์ „ ๋Œ€ํ™” ๋งฅ๋ฝ์„ ์ถฉ๋ถ„ํžˆ ๋ฐ˜์˜ํ•˜๊ณ , GPT-4o-mini์˜ ์ผ๋ฐ˜ ํ•™์Šต ๋ฐ์ดํ„ฐ์— ๊ธฐ๋ฐ˜ํ•˜์—ฌ ์ตœ์„ ์„ ๋‹คํ•ด ์ „๋ฌธ์ ์œผ๋กœ ๋‹ต๋ณ€ํ•ด ์ฃผ์„ธ์š”.\n\n"
"[์ค‘์š” ์ง€์นจ]\n"
"- ์‹ค์ œ ์กด์žฌํ•˜์ง€ ์•Š๋Š” ๋‰ด์Šค ๋งํฌ, ๋‚ ์งœ, ๊ฐ€์งœ URL์„ ์ ˆ๋Œ€ ์ƒ์„ฑํ•˜์ง€ ๋งˆ์„ธ์š”.\n"
"- ๊ฐ€๋Šฅํ•˜๋‹ค๋ฉด ์ทจ์—… ์ค€๋น„์ƒ์ด ๋ฉด์ ‘/์ž์†Œ์„œ์— ํ™œ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ์‹ค์งˆ์ ์ธ ์ธ์‚ฌ์ดํŠธ๋ฅผ ํฌํ•จํ•ด ์ฃผ์„ธ์š”.\n"
"- ๋‹ต๋ณ€์ด ์ผ๋ฐ˜ AI ํ•™์Šต ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜์ž„์„ ์ˆจ๊ธฐ์ง€ ๋ง๊ณ  ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์–ธ๊ธ‰ํ•˜๋ฉฐ ์‹œ์ž‘ํ•˜์„ธ์š”."
)
normalized_history = self._normalize_history(history)
response = self._rag_llm.invoke(
input=query_text,
message_history=normalized_history,
system_instruction=system_prompt,
)
return str(response.content)
def search_with_fallback(self, query_text: str, history: list) -> HybridResult:
"""GraphRAG ๊ฒ€์ƒ‰ -> ์ปจํ…์ŠคํŠธ ํ’ˆ์งˆ ํ‰๊ฐ€ -> ์ผ๋ฐ˜ ์ง€์‹ Fallback ํ†ตํ•ฉ ๋ฉ”์„œ๋“œ.
Args:
query_text: ์‚ฌ์šฉ์ž ์งˆ๋ฌธ ํ…์ŠคํŠธ
history: ์ด์ „ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ (Gradio ํ˜•์‹)
Returns:
HybridResult: ๋‹ต๋ณ€, ๋ชจ๋“œ("graph"|"general"), RetrieverResult
"""
self._init_once()
assert self._hybrid_retriever is not None
assert self._graphrag is not None
# 1๋‹จ๊ณ„: LLM ํ˜ธ์ถœ ์—†์ด DB ์ฟผ๋ฆฌ๋งŒ์œผ๋กœ ๊ฒ€์ƒ‰ ์‹คํ–‰
retriever_result = self._hybrid_retriever.search(query_text=query_text)
# 2๋‹จ๊ณ„: ์ปจํ…์ŠคํŠธ ํ’ˆ์งˆ ํ‰๊ฐ€ ํ›„ ๋ผ์šฐํŒ…
if self._is_context_sufficient(query_text, history, retriever_result):
# 3a. ๊ทธ๋ž˜ํ”„ ๊ธฐ๋ฐ˜ -> GraphRAG ๋ธŒ๋ฆฌํ•‘ ๋‹ต๋ณ€ ์ƒ์„ฑ
rag_result = self._graphrag.search(query_text=query_text)
return HybridResult(
answer=rag_result.answer,
mode="graph",
retriever_result=rag_result.retriever_result,
)
else:
# 3b. ์ผ๋ฐ˜ ์ง€์‹ ๊ธฐ๋ฐ˜ -> ํžˆ์Šคํ† ๋ฆฌ ํฌํ•จ GPT-4o-mini ์ง์ ‘ ํ˜ธ์ถœ
answer = self._generate_general_answer(query_text, history)
return HybridResult(answer=answer, mode="general", retriever_result=None)
def search(self, *args: Any, **kwargs: Any) -> Any:
self._init_once()
assert self._graphrag is not None
return self._graphrag.search(*args, **kwargs)
def __getattr__(self, name: str) -> Any:
self._init_once()
return getattr(self._graphrag, name)
# app.py์—์„œ ์ด ๊ฐ์ฒด๋ฅผ ์ง์ ‘ importํ•˜์—ฌ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค (์ด๋•Œ๋Š” DB ์—ฐ๊ฒฐ์„ ์‹œ๋„ํ•˜์ง€ ์•Š์Œ).
graphrag = LazyGraphRAG()