FinGraph / app.py
dev-yuje's picture
fix: monkeypatch gradio url_ok check for loopback bypass
5019e1b
"""
app.py β€” FinNode GraphRAG 챗봇
================================
Hugging Face Spaces 배포 μ§„μž…μ .
Gradio ChatInterface + LangGraph 기반 λŒ€ν™” 흐름 μ œμ–΄.
μ‹€ν–‰:
python app.py
"""
from typing import Any, Dict, List, TypedDict
import dotenv
import gradio.networking
# ──────────────────────────────────────────
# HF Spaces/Docker 루프백 접속 검증 우회 λͺ½ν‚€ 패치
# ──────────────────────────────────────────
# 일뢀 가상화/도컀 ν™˜κ²½μ—μ„œ 127.0.0.1:7860 둜컬 접속 μ—¬λΆ€ 자체 체크가
# ν”„λ‘μ‹œ 및 루프백 μΈν„°νŽ˜μ΄μŠ€ μ°¨λ‹¨μœΌλ‘œ 인해 μ‹€νŒ¨ν•˜μ—¬ ValueErrorκ°€ λ°œμƒν•˜λŠ” ν˜„μƒμ„ λ°©μ§€ν•©λ‹ˆλ‹€.
gradio.networking.url_ok = lambda *args, **kwargs: True
import gradio as gr
from langgraph.graph import END, StateGraph
from src.retrieval.finRetrieval import HybridResult, graphrag
from src.utils.ui_templates import CUSTOM_CSS, build_stats_html
dotenv.load_dotenv()
# ──────────────────────────────────────────
# Startup DB μžκ°€ 진단 (Fail-Fast)
# ──────────────────────────────────────────
# ν—ˆκΉ…νŽ˜μ΄μŠ€Spaces 및 μ‹€μ œ μ•± μ„œλ²„ ꡬ동 μ‹œμ μ—λŠ” μ¦‰μ‹œ μžκ°€ 진단을 μˆ˜ν–‰ν•˜μ—¬,
# Neo4j λ°μ΄ν„°λ² μ΄μŠ€ 연결이 λΆˆκ°€λŠ₯ν•˜λ©΄ ꡬ동 μ‹€νŒ¨(Crash Early)λ₯Ό μΌμœΌν‚΅λ‹ˆλ‹€.
try:
graphrag._init_once()
try:
print("βœ… [μžκ°€ 진단 μ™„λ£Œ] Neo4j AuraDB 지식 κ·Έλž˜ν”„μ— μ™„λ²½ν•˜κ²Œ μ ‘μ†λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
except UnicodeEncodeError:
print("[OK] [μžκ°€ 진단 μ™„λ£Œ] Neo4j AuraDB 지식 κ·Έλž˜ν”„μ— μ™„λ²½ν•˜κ²Œ μ ‘μ†λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
except Exception as e:
try:
print(f"❌ [μžκ°€ 진단 μ‹€νŒ¨] Neo4j DB μ—°κ²° 확인 쀑 μ—λŸ¬κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}")
except UnicodeEncodeError:
print(f"[FAIL] [μžκ°€ 진단 μ‹€νŒ¨] Neo4j DB μ—°κ²° 확인 쀑 μ—λŸ¬κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}")
raise e
# ──────────────────────────────────────────
# 1. LangGraph 챗봇 State μ •μ˜
# ──────────────────────────────────────────
class ChatState(TypedDict):
question: str # μ‚¬μš©μž 질문
history: List[dict] # λŒ€ν™” νžˆμŠ€ν† λ¦¬ [{"role": "user"/"assistant", "content": "..."}]
context: str # GraphRAG 검색 κ²°κ³Ό λ˜λŠ” 일반 지식 λ‹΅λ³€
answer: str # μ΅œμ’… λ‹΅λ³€
mode: str # "graph": κ·Έλž˜ν”„ 기반 | "general": 일반 지식 기반
# ──────────────────────────────────────────
# 2. LangGraph λ…Έλ“œ μ •μ˜
# ──────────────────────────────────────────
def retrieve_node(state: ChatState) -> ChatState:
"""Node 1: search_with_fallback으둜 κ·Έλž˜ν”„ 검색 λ˜λŠ” 일반 지식 응닡 λΌμš°νŒ…"""
try:
hybrid: HybridResult = graphrag.search_with_fallback(
query_text=state["question"],
history=state["history"],
)
if hybrid.mode == "general":
# 일반 지식 λͺ¨λ“œ: λ°°λ„ˆ + GPT-4o λ‹΅λ³€ λ°˜ν™˜
disclaimer = (
"> ⚠️ **지식 κ·Έλž˜ν”„μ—μ„œ κ΄€λ ¨ λ‰΄μŠ€λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€.**\n"
"> GPT-4o의 일반 ν•™μŠ΅ 데이터λ₯Ό 기반으둜 λ‹΅λ³€ν•©λ‹ˆλ‹€.\n"
"> μ΅œμ‹  κ΅­λ‚΄ λ‰΄μŠ€ 기반 정보가 ν•„μš”ν•˜λ‹€λ©΄ μ§ˆλ¬Έμ„ 더 ꡬ체적으둜 μž…λ ₯ν•΄ λ³΄μ„Έμš”.\n\n"
"---\n\n"
)
context = disclaimer + hybrid.answer
return {**state, "context": context, "mode": "general"}
# κ·Έλž˜ν”„ 기반 λͺ¨λ“œ: κΈ°μ‘΄ 좜처 μΆ”μΆœ + λ‰΄μŠ€ ν”Όλ“œ 둜직
context = hybrid.answer
sources = []
seen_urls: set = set()
# retriever_resultμ—μ„œ μƒμœ„ 3개 λ‰΄μŠ€ 좜처 μΆ”μΆœ
retriever_result = hybrid.retriever_result
if retriever_result and hasattr(retriever_result, "items"):
for item in retriever_result.items:
meta = getattr(item, "metadata", {})
title = meta.get("article_title")
url = meta.get("article_url")
date = meta.get("article_date")
if title and url and url not in seen_urls:
seen_urls.add(url)
# date ν˜•μ‹ ν¬λ§·νŒ… (예: 2026-05-19T00:00:00Z -> 2026-05-19)
if date and "T" in str(date):
date = str(date).split("T")[0]
sources.append({"title": title, "url": url, "date": date})
if len(sources) >= 3:
break
# λ§Œμ•½ retriever_resultμ—μ„œ μ°Ύμ§€ λͺ»ν•œ 경우, Neo4j DBμ—μ„œ ν‚€μ›Œλ“œ 기반으둜 직접 κ΄€λ ¨ λ‰΄μŠ€ 3개 λ°±μ—… 쑰회
if not sources:
try:
from src.retrieval.finRetrieval import get_neo4j_driver
driver = get_neo4j_driver()
# λ‹¨μˆœ ν‚€μ›Œλ“œ λ§€μΉ­ 쿼리
query_words = [w for w in state["question"].split() if len(w) > 1]
conditions = []
for w in query_words[:3]:
conditions.append(f"a.title CONTAINS '{w}' OR a.description CONTAINS '{w}'")
with driver.session() as session:
cypher = "MATCH (a:Article) "
if conditions:
cypher += "WHERE " + " OR ".join(conditions) + " "
cypher += "RETURN a.title as title, a.url as url, a.published_date as date ORDER BY a.published_date DESC LIMIT 3"
res_backup = session.run(cypher)
for r in res_backup:
title = r["title"]
url = r["url"]
date = r["date"]
if title and url and url not in seen_urls:
seen_urls.add(url)
if date and "T" in str(date):
date = str(date).split("T")[0]
sources.append({"title": title, "url": url, "date": date})
except Exception:
pass
# λ§Œμ•½ μ—¬μ „νžˆ λΉ„μ–΄μžˆλ‹€λ©΄, μ΅œμ‹  λ‰΄μŠ€ 3개 λ…ΈμΆœ (상상해 λ‚Έ κ°€μ§œ 정보 λ°©μ§€)
if not sources:
try:
from src.retrieval.finRetrieval import get_neo4j_driver
driver = get_neo4j_driver()
with driver.session() as session:
res_latest = session.run(
"MATCH (a:Article) RETURN a.title as title, a.url as url, a.published_date as date "
"ORDER BY a.published_date DESC LIMIT 3"
)
for r in res_latest:
title = r["title"]
url = r["url"]
date = r["date"]
if title and url and url not in seen_urls:
seen_urls.add(url)
if date and "T" in str(date):
date = str(date).split("T")[0]
sources.append({"title": title, "url": url, "date": date})
except Exception:
pass
# λ‹΅λ³€ 끝에 πŸ“° κ΄€λ ¨ λ‰΄μŠ€ ν”Όλ“œ 파트 μ •μ„±μŠ€λŸ½κ²Œ 덧뢙이기
if sources:
news_feed = "\n\nπŸ“° **κ΄€λ ¨ λ‰΄μŠ€ ν”Όλ“œ (μ‹€μ‹œκ°„ 뢄석 좜처)**\n"
for s in sources:
date_str = f" ({s['date']})" if s['date'] else ""
news_feed += f"- πŸ”— [{s['title']}]({s['url']}){date_str}\n"
# μ€‘λ³΅μœΌλ‘œ κ΄€λ ¨ λ‰΄μŠ€ ν”Όλ“œκ°€ λΆ™μ§€ μ•Šλ„λ‘ λ°©μ§€
if "κ΄€λ ¨ λ‰΄μŠ€ ν”Όλ“œ" not in context:
context += news_feed
except Exception as e:
context = f"[검색 였λ₯˜: {e}]"
return {**state, "context": context, "mode": state.get("mode", "graph")}
def generate_node(state: ChatState) -> ChatState:
"""Node 2: λŒ€ν™” νžˆμŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ μ΅œμ’… λ‹΅λ³€ 생성
GraphRAG(graph λͺ¨λ“œ) λ˜λŠ” 일반 지식(general λͺ¨λ“œ) 응닡 λͺ¨λ‘
retrieve_nodeμ—μ„œ context에 μ΅œμ’… ν…μŠ€νŠΈλ₯Ό λ‹΄μ•„μ£Όλ―€λ‘œ κ·ΈλŒ€λ‘œ μ‚¬μš©ν•©λ‹ˆλ‹€.
"""
answer = state["context"] if state["context"] else "κ΄€λ ¨ 정보λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
return {**state, "answer": answer}
# ──────────────────────────────────────────
# 3. LangGraph μ›Œν¬ν”Œλ‘œμš° 컴파일
# ──────────────────────────────────────────
builder = StateGraph(ChatState)
builder.add_node("retrieve", retrieve_node)
builder.add_node("generate", generate_node)
builder.set_entry_point("retrieve")
builder.add_edge("retrieve", "generate")
builder.add_edge("generate", END)
chat_graph = builder.compile()
# ──────────────────────────────────────────
# 4. Gradio 연동 ν•¨μˆ˜
# ──────────────────────────────────────────
def chat(message: str, history: list):
"""Gradio ChatInterfaceκ°€ ν˜ΈμΆœν•˜λŠ” ν•¨μˆ˜.
Args:
message: μ‚¬μš©μž μž…λ ₯ λ©”μ‹œμ§€
history: Gradioκ°€ κ΄€λ¦¬ν•˜λŠ” λŒ€ν™” νžˆμŠ€ν† λ¦¬
[{"role": "user"/"assistant", "content": "..."}] ν˜•μ‹
Returns:
Generator: 챗봇 λ‹΅λ³€ (μ‹€μ‹œκ°„ μƒνƒœ ν‘œμ‹œ 포함)
"""
if not message.strip():
yield "μ§ˆλ¬Έμ„ μž…λ ₯ν•΄ μ£Όμ„Έμš”."
return
# Gradio history β†’ LangGraph state ν˜•μ‹μœΌλ‘œ λ³€ν™˜
state: ChatState = {
"question": message,
"history": history,
"context": "",
"answer": "",
"mode": "",
}
yield "πŸ” μ‹€μ‹œκ°„ 지식 κ·Έλž˜ν”„μ—μ„œ κ΄€λ ¨ λ‰΄μŠ€λ₯Ό κ²€μƒ‰ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."
try:
# LangGraph의 stream을 μ‚¬μš©ν•˜μ—¬ 각 λ…Έλ“œ μ‹€ν–‰ μ‹œμ λ§ˆλ‹€ 이벀트λ₯Ό λ°›μŒ
for event in chat_graph.stream(state):
if "retrieve" in event:
retrieved_mode = event["retrieve"].get("mode", "graph")
if retrieved_mode == "general":
yield "🌐 κ΄€λ ¨ λ‰΄μŠ€ μ—†μŒ β€” GPT-4o 일반 μ§€μ‹μœΌλ‘œ 닡변을 μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."
else:
yield "πŸ’‘ 검색 μ™„λ£Œ! 뢄석 κ²°κ³Όλ₯Ό λ°”νƒ•μœΌλ‘œ μ΅œμ’… 닡변을 μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."
elif "generate" in event:
yield event["generate"]["answer"]
except Exception as e:
yield f"⚠️ 챗봇 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
def get_db_stats() -> Dict[str, Any]:
"""Neo4j λ°μ΄ν„°λ² μ΄μŠ€λ‘œλΆ€ν„° μ‹€μ‹œκ°„ 지식 κ·Έλž˜ν”„ 톡계 및 μš”μ•½μ„ μ•ˆμ „ν•˜κ²Œ μ‘°νšŒν•©λ‹ˆλ‹€.
Returns:
Dict[str, Any]: 기사 건수, κΈ°μ—… 수, 기술 수, 관계 수, μ„ΈλΆ€ μ„€λͺ… λͺ©λ‘
"""
stats: Dict[str, Any] = {
"articles": 0,
"companies": 0,
"technologies": 0,
"techs_list": [],
"recent_articles": [],
}
try:
from src.retrieval.finRetrieval import get_neo4j_driver
driver = get_neo4j_driver()
with driver.session() as session:
# 1. 각 λ…Έλ“œλ³„ 갯수 쑰회
res_articles = session.run("MATCH (a:Article) RETURN count(a) as cnt").single()
if res_articles:
stats["articles"] = res_articles["cnt"]
res_companies = session.run("MATCH (c:AICompany) RETURN count(c) as cnt").single()
if res_companies:
stats["companies"] = res_companies["cnt"]
res_techs = session.run("MATCH (t:AITechnology) RETURN count(t) as cnt").single()
if res_techs:
stats["technologies"] = res_techs["cnt"]
# 2. 기술 λͺ©λ‘ & μ„€λͺ… 쑰회 (μƒμœ„ 8개)
res_tech_list = session.run(
"MATCH (t:AITechnology) "
"RETURN t.name as name, COALESCE(t.description, 'AI ν˜μ‹  기술 인프라') as desc LIMIT 8"
)
stats["techs_list"] = [{"name": r["name"], "desc": r["desc"]} for r in res_tech_list]
# 2.5 μ΅œμ‹  μ£Όλͺ© κΈ°μ—… 리슀트 (μƒμœ„ 5개)
res_comp_list = session.run(
"MATCH (c:AICompany) "
"OPTIONAL MATCH (a:Article)-[:MENTIONS]->(c) "
"RETURN c.name as name, count(a) as cnt "
"ORDER BY cnt DESC LIMIT 5"
)
stats["companies_list"] = [{"name": r["name"]} for r in res_comp_list]
# 3. 졜근 기사 λͺ©λ‘ 쑰회 (졜근 4개)
res_art_list = session.run(
"MATCH (a:Article) "
"RETURN a.title as title, a.published_date as date, a.url as url "
"ORDER BY a.published_date DESC LIMIT 4"
)
stats["recent_articles"] = [
{"title": r["title"], "date": r["date"], "url": r["url"]}
for r in res_art_list
]
except Exception as e:
print(f"⚠️ [톡계 쑰회 μ‹€νŒ¨] Neo4j 톡계λ₯Ό κ°€μ Έμ˜€λŠ” 데 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {e}")
return stats
# ──────────────────────────────────────────
# 5. Gradio UI ꡬ성
# ──────────────────────────────────────────
# Gradio 버전 동적 감지 및 ν…Œλ§ˆ μ„€μ • λΆ„κΈ° (둜컬 6.x vs 원격 4.x ν¬λž˜μ‹œ μ™„λ²½ λ°©μ§€)
try:
gradio_major = int(gr.__version__.split(".")[0])
except Exception:
gradio_major = 4 # κΈ°λ³Έκ°’ λ°±μ—…
theme_obj = gr.themes.Soft(
font=["Pretendard", "-apple-system", "BlinkMacSystemFont", "system-ui", "sans-serif"],
primary_hue="sky",
secondary_hue="slate",
)
CHATBOT_DESCRIPTION = """
<div class="prose">
<h3>🌌 AI 기반 금육/ν•€ν…Œν¬ ν˜μ‹  νŠΈλ Œλ“œλ₯Ό λΆ„μ„ν•˜λŠ” 지식 κ·Έλž˜ν”„(GraphRAG)에 μ§ˆλ¬Έν•˜μ„Έμš”.</h3>
<ul>
<li>πŸ“° <b>κΈˆμœ΅μ‚¬/ν•€ν…Œν¬ AI 동ν–₯</b> β€” μ‹ ν•œμ€ν–‰, 카카였페이, ν† μŠ€λ±…ν¬, λ„€μ΄λ²„νŽ˜μ΄ λ“±μ˜ μ΅œμ‹  금육 AI νŠΈλ Œλ“œ</li>
<li>πŸ”¬ <b>ν•€ν…Œν¬ 핡심 기술 뢄석</b> β€” λ‘œλ³΄μ–΄λ“œλ°”μ΄μ €, λŒ€μ•ˆμ‹ μš©ν‰κ°€, AI FDS, 금육 λ§ˆμ΄λ°μ΄ν„° λ“± 정리</li>
<li>πŸ”— <b>μ‹€μ œ λ‰΄μŠ€ 좜처 제곡</b> β€” λ‹΅λ³€λ§ˆλ‹€ μ‹€μ œ λ³΄λ„λœ κ·Όκ±° 기사 및 좜처 URL 포함</li>
</ul>
<p>πŸ‘‡ μ•„λž˜ μ˜ˆμ‹œ 질문 λ²„νŠΌμ„ ν΄λ¦­ν•˜κ±°λ‚˜ 직접 μž…λ ₯ν•΄ λ³΄μ„Έμš”.</p>
</div>
"""
interface_kwargs = {
"fn": chat,
"chatbot": gr.Chatbot(height=700, placeholder=CHATBOT_DESCRIPTION),
"textbox": gr.Textbox(
placeholder="λΆ„μ„ν•˜κ³  싢은 λ‚΄μš©μ„ μžμ—°μ–΄λ‘œ μž…λ ₯ν•΄μ£Όμ„Έμš”...",
container=False,
scale=7,
submit_btn="전솑",
),
"examples": [
"μ‹ ν•œμ€ν–‰μ˜ 'μ‹ ν•œ AI 쏠 포트폴리였' λ‘œλ³΄μ–΄λ“œλ°”μ΄μ € 기술과 개인 λ§žμΆ€ν˜• μ„œλΉ„μŠ€μ˜ νŠΉμ§•μ„ μ„€λͺ…ν•΄μ€˜",
"μΉ΄μΉ΄μ˜€νŽ˜μ΄κ°€ μ”¬νŒŒμΌλŸ¬λ₯Ό μœ„ν•΄ κ°œλ°œν•œ 'AI λŒ€μ•ˆμ‹ μš©ν‰κ°€' λͺ¨λΈμ˜ μž₯점과 λŒ€μΆœ 승인 νš¨κ³ΌλŠ” λ¬΄μ—‡μΈκ°€μš”?",
"ν† μŠ€λ±…ν¬μ˜ μ‹€μ‹œκ°„ λ³΄μ΄μŠ€ν”Όμ‹± 탐지 기술인 'ν† μŠ€ AI FDS'의 μž‘λ™ 원리와 μ°¨λ‹¨μœ¨μ„ μ•Œλ €μ€˜",
"λ„€μ΄λ²„νŽ˜μ΄κ°€ μΆœμ‹œν•œ 'AI 금육 λΉ„μ„œ'κ°€ λ§ˆμ΄λ°μ΄ν„°μ™€ κ²°ν•©ν•˜μ—¬ μ œκ³΅ν•˜λŠ” 맞좀 μžμ‚° κ°€μ΄λ“œλŠ” μ–΄λ–€ κ²ƒμΈκ°€μš”?",
],
"cache_examples": False,
}
# HF Spaces μ»¨ν…Œμ΄λ„ˆ λ‚΄ 루프백 검증 μ‹€νŒ¨(ValueError) 우회 및 둜컬/원격 ν˜Έν™˜ ꡬ동을 μœ„ν•΄ launch 인자 μ •λ°€ 섀계
launch_kwargs = {
"server_name": "0.0.0.0",
"server_port": 7860,
}
# 버전에 맞좘 ν…Œλ§ˆ 및 CSS μ£Όμž… νŒŒμ΄ν”„λΌμΈ (Gradio 6.x ν˜Έν™˜μ„± 보μž₯)
blocks_kwargs: Dict[str, Any] = {}
if gradio_major < 5:
interface_kwargs["theme"] = theme_obj
blocks_kwargs["theme"] = theme_obj
blocks_kwargs["css"] = CUSTOM_CSS
elif gradio_major < 6:
launch_kwargs["theme"] = theme_obj
blocks_kwargs["theme"] = theme_obj
blocks_kwargs["css"] = CUSTOM_CSS
else:
launch_kwargs["theme"] = theme_obj
launch_kwargs["css"] = CUSTOM_CSS
# Blocksλ₯Ό ν™œμš©ν•œ 2컬럼 λ ˆμ΄μ•„μ›ƒ λŒ€μ‹œλ³΄λ“œ 개편
with gr.Blocks(**blocks_kwargs) as demo:
# 1. 상단 κΈ€λ‘œλ²Œ λ„€λΉ„κ²Œμ΄μ…˜ λ°” (GNB)
gr.HTML("""
<div style="display: flex; justify-content: space-between; align-items: center; padding: 10px 20px; border-bottom: 1px solid rgba(196, 195, 236, 0.45); background-color: rgba(255, 255, 255, 0.65); backdrop-filter: blur(12px); margin: -20px -20px 6px -20px;">
<div style="font-size: 20px; font-weight: 900; color: #0f172a; display: flex; align-items: center; gap: 12px;">
πŸ“ˆ FinGraph <span style="font-size: 14px; font-weight: 700; color: #475569;">GraphRAG Enhanced AI Terminal</span>
</div>
</div>
""")
with gr.Row():
# 2. μ™Όμͺ½ 컬럼: μ‚¬μ΄λ“œλ°” (λŒ€μ‹œλ³΄λ“œ 및 ν•˜λ‹¨ 메뉴) - 3:7 split을 μœ„ν•΄ scale=3 μ„€μ •
with gr.Column(scale=3, min_width=320):
stats_data = get_db_stats()
stats_html = build_stats_html(stats_data)
gr.HTML(stats_html)
# 3. 였λ₯Έμͺ½ 컬럼: 메인 챗봇 에어리어 - 3:7 split을 μœ„ν•΄ scale=7 μ„€μ •
with gr.Column(scale=7, min_width=500, elem_id="chat-column"):
# ChatInterface without redundant titles/descriptions
chatbot_interface_kwargs: Dict[str, Any] = interface_kwargs.copy()
chatbot_interface_kwargs.pop("title", None)
chatbot_interface_kwargs.pop("description", None)
chatbot_interface_kwargs.pop("theme", None)
gr.ChatInterface(**chatbot_interface_kwargs) # type: ignore
if __name__ == "__main__":
demo.launch(**launch_kwargs)