| """ |
| app.py β FinNode GraphRAG μ±λ΄ |
| ================================ |
| Hugging Face Spaces λ°°ν¬ μ§μ
μ . |
| Gradio ChatInterface + LangGraph κΈ°λ° λν νλ¦ μ μ΄. |
| |
| μ€ν: |
| python app.py |
| """ |
|
|
| from typing import Any, Dict, List, TypedDict |
|
|
| import dotenv |
| import gradio.networking |
|
|
| |
| |
| |
| |
| |
| gradio.networking.url_ok = lambda *args, **kwargs: True |
|
|
| import gradio as gr |
| from langgraph.graph import END, StateGraph |
|
|
| from src.retrieval.finRetrieval import HybridResult, graphrag |
| from src.utils.ui_templates import CUSTOM_CSS, build_stats_html |
|
|
| dotenv.load_dotenv() |
|
|
| |
| |
| |
| |
| |
| try: |
| graphrag._init_once() |
| try: |
| print("β
[μκ° μ§λ¨ μλ£] Neo4j AuraDB μ§μ κ·Έλνμ μλ²½νκ² μ μλμμ΅λλ€!") |
| except UnicodeEncodeError: |
| print("[OK] [μκ° μ§λ¨ μλ£] Neo4j AuraDB μ§μ κ·Έλνμ μλ²½νκ² μ μλμμ΅λλ€!") |
| except Exception as e: |
| try: |
| print(f"β [μκ° μ§λ¨ μ€ν¨] Neo4j DB μ°κ²° νμΈ μ€ μλ¬κ° λ°μνμ΅λλ€: {e}") |
| except UnicodeEncodeError: |
| print(f"[FAIL] [μκ° μ§λ¨ μ€ν¨] Neo4j DB μ°κ²° νμΈ μ€ μλ¬κ° λ°μνμ΅λλ€: {e}") |
| raise e |
|
|
| |
| |
| |
|
|
|
|
| class ChatState(TypedDict): |
| question: str |
| history: List[dict] |
| context: str |
| answer: str |
| mode: str |
|
|
|
|
| |
| |
| |
|
|
|
|
| def retrieve_node(state: ChatState) -> ChatState: |
| """Node 1: search_with_fallbackμΌλ‘ κ·Έλν κ²μ λλ μΌλ° μ§μ μλ΅ λΌμ°ν
""" |
| try: |
| hybrid: HybridResult = graphrag.search_with_fallback( |
| query_text=state["question"], |
| history=state["history"], |
| ) |
|
|
| if hybrid.mode == "general": |
| |
| disclaimer = ( |
| "> β οΈ **μ§μ κ·Έλνμμ κ΄λ ¨ λ΄μ€λ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€.**\n" |
| "> GPT-4oμ μΌλ° νμ΅ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ λ΅λ³ν©λλ€.\n" |
| "> μ΅μ κ΅λ΄ λ΄μ€ κΈ°λ° μ λ³΄κ° νμνλ€λ©΄ μ§λ¬Έμ λ ꡬ체μ μΌλ‘ μ
λ ₯ν΄ λ³΄μΈμ.\n\n" |
| "---\n\n" |
| ) |
| context = disclaimer + hybrid.answer |
| return {**state, "context": context, "mode": "general"} |
|
|
| |
| context = hybrid.answer |
| sources = [] |
| seen_urls: set = set() |
|
|
| |
| retriever_result = hybrid.retriever_result |
| if retriever_result and hasattr(retriever_result, "items"): |
| for item in retriever_result.items: |
| meta = getattr(item, "metadata", {}) |
| title = meta.get("article_title") |
| url = meta.get("article_url") |
| date = meta.get("article_date") |
| if title and url and url not in seen_urls: |
| seen_urls.add(url) |
| |
| if date and "T" in str(date): |
| date = str(date).split("T")[0] |
| sources.append({"title": title, "url": url, "date": date}) |
| if len(sources) >= 3: |
| break |
| |
| |
| if not sources: |
| try: |
| from src.retrieval.finRetrieval import get_neo4j_driver |
| driver = get_neo4j_driver() |
| |
| query_words = [w for w in state["question"].split() if len(w) > 1] |
| conditions = [] |
| for w in query_words[:3]: |
| conditions.append(f"a.title CONTAINS '{w}' OR a.description CONTAINS '{w}'") |
| |
| with driver.session() as session: |
| cypher = "MATCH (a:Article) " |
| if conditions: |
| cypher += "WHERE " + " OR ".join(conditions) + " " |
| cypher += "RETURN a.title as title, a.url as url, a.published_date as date ORDER BY a.published_date DESC LIMIT 3" |
| |
| res_backup = session.run(cypher) |
| for r in res_backup: |
| title = r["title"] |
| url = r["url"] |
| date = r["date"] |
| if title and url and url not in seen_urls: |
| seen_urls.add(url) |
| if date and "T" in str(date): |
| date = str(date).split("T")[0] |
| sources.append({"title": title, "url": url, "date": date}) |
| except Exception: |
| pass |
| |
| |
| if not sources: |
| try: |
| from src.retrieval.finRetrieval import get_neo4j_driver |
| driver = get_neo4j_driver() |
| with driver.session() as session: |
| res_latest = session.run( |
| "MATCH (a:Article) RETURN a.title as title, a.url as url, a.published_date as date " |
| "ORDER BY a.published_date DESC LIMIT 3" |
| ) |
| for r in res_latest: |
| title = r["title"] |
| url = r["url"] |
| date = r["date"] |
| if title and url and url not in seen_urls: |
| seen_urls.add(url) |
| if date and "T" in str(date): |
| date = str(date).split("T")[0] |
| sources.append({"title": title, "url": url, "date": date}) |
| except Exception: |
| pass |
| |
| |
| if sources: |
| news_feed = "\n\nπ° **κ΄λ ¨ λ΄μ€ νΌλ (μ€μκ° λΆμ μΆμ²)**\n" |
| for s in sources: |
| date_str = f" ({s['date']})" if s['date'] else "" |
| news_feed += f"- π [{s['title']}]({s['url']}){date_str}\n" |
| |
| |
| if "κ΄λ ¨ λ΄μ€ νΌλ" not in context: |
| context += news_feed |
| |
| except Exception as e: |
| context = f"[κ²μ μ€λ₯: {e}]" |
| return {**state, "context": context, "mode": state.get("mode", "graph")} |
|
|
|
|
| def generate_node(state: ChatState) -> ChatState: |
| """Node 2: λν νμ€ν 리λ₯Ό κ³ λ €νμ¬ μ΅μ’
λ΅λ³ μμ± |
| |
| GraphRAG(graph λͺ¨λ) λλ μΌλ° μ§μ(general λͺ¨λ) μλ΅ λͺ¨λ |
| retrieve_nodeμμ contextμ μ΅μ’
ν
μ€νΈλ₯Ό λ΄μμ£Όλ―λ‘ κ·Έλλ‘ μ¬μ©ν©λλ€. |
| """ |
| answer = state["context"] if state["context"] else "κ΄λ ¨ μ 보λ₯Ό μ°Ύμ μ μμ΅λλ€." |
| return {**state, "answer": answer} |
|
|
|
|
| |
| |
| |
|
|
| builder = StateGraph(ChatState) |
| builder.add_node("retrieve", retrieve_node) |
| builder.add_node("generate", generate_node) |
| builder.set_entry_point("retrieve") |
| builder.add_edge("retrieve", "generate") |
| builder.add_edge("generate", END) |
|
|
| chat_graph = builder.compile() |
|
|
|
|
| |
| |
| |
|
|
|
|
| def chat(message: str, history: list): |
| """Gradio ChatInterfaceκ° νΈμΆνλ ν¨μ. |
| |
| Args: |
| message: μ¬μ©μ μ
λ ₯ λ©μμ§ |
| history: Gradioκ° κ΄λ¦¬νλ λν νμ€ν 리 |
| [{"role": "user"/"assistant", "content": "..."}] νμ |
| |
| Returns: |
| Generator: μ±λ΄ λ΅λ³ (μ€μκ° μν νμ ν¬ν¨) |
| """ |
| if not message.strip(): |
| yield "μ§λ¬Έμ μ
λ ₯ν΄ μ£ΌμΈμ." |
| return |
|
|
| |
| state: ChatState = { |
| "question": message, |
| "history": history, |
| "context": "", |
| "answer": "", |
| "mode": "", |
| } |
|
|
| yield "π μ€μκ° μ§μ κ·Έλνμμ κ΄λ ¨ λ΄μ€λ₯Ό κ²μνλ μ€μ
λλ€..." |
|
|
| try: |
| |
| for event in chat_graph.stream(state): |
| if "retrieve" in event: |
| retrieved_mode = event["retrieve"].get("mode", "graph") |
| if retrieved_mode == "general": |
| yield "π κ΄λ ¨ λ΄μ€ μμ β GPT-4o μΌλ° μ§μμΌλ‘ λ΅λ³μ μμ±νλ μ€μ
λλ€..." |
| else: |
| yield "π‘ κ²μ μλ£! λΆμ κ²°κ³Όλ₯Ό λ°νμΌλ‘ μ΅μ’
λ΅λ³μ μμ±νλ μ€μ
λλ€..." |
| elif "generate" in event: |
| yield event["generate"]["answer"] |
| except Exception as e: |
| yield f"β οΈ μ±λ΄ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
|
|
|
| def get_db_stats() -> Dict[str, Any]: |
| """Neo4j λ°μ΄ν°λ² μ΄μ€λ‘λΆν° μ€μκ° μ§μ κ·Έλν ν΅κ³ λ° μμ½μ μμ νκ² μ‘°νν©λλ€. |
| |
| Returns: |
| Dict[str, Any]: κΈ°μ¬ κ±΄μ, κΈ°μ
μ, κΈ°μ μ, κ΄κ³ μ, μΈλΆ μ€λͺ
λͺ©λ‘ |
| """ |
| stats: Dict[str, Any] = { |
| "articles": 0, |
| "companies": 0, |
| "technologies": 0, |
| "techs_list": [], |
| "recent_articles": [], |
| } |
| try: |
| from src.retrieval.finRetrieval import get_neo4j_driver |
| driver = get_neo4j_driver() |
| with driver.session() as session: |
| |
| res_articles = session.run("MATCH (a:Article) RETURN count(a) as cnt").single() |
| if res_articles: |
| stats["articles"] = res_articles["cnt"] |
|
|
| res_companies = session.run("MATCH (c:AICompany) RETURN count(c) as cnt").single() |
| if res_companies: |
| stats["companies"] = res_companies["cnt"] |
|
|
| res_techs = session.run("MATCH (t:AITechnology) RETURN count(t) as cnt").single() |
| if res_techs: |
| stats["technologies"] = res_techs["cnt"] |
|
|
| |
| res_tech_list = session.run( |
| "MATCH (t:AITechnology) " |
| "RETURN t.name as name, COALESCE(t.description, 'AI νμ κΈ°μ μΈνλΌ') as desc LIMIT 8" |
| ) |
| stats["techs_list"] = [{"name": r["name"], "desc": r["desc"]} for r in res_tech_list] |
| |
| |
| res_comp_list = session.run( |
| "MATCH (c:AICompany) " |
| "OPTIONAL MATCH (a:Article)-[:MENTIONS]->(c) " |
| "RETURN c.name as name, count(a) as cnt " |
| "ORDER BY cnt DESC LIMIT 5" |
| ) |
| stats["companies_list"] = [{"name": r["name"]} for r in res_comp_list] |
|
|
| |
| res_art_list = session.run( |
| "MATCH (a:Article) " |
| "RETURN a.title as title, a.published_date as date, a.url as url " |
| "ORDER BY a.published_date DESC LIMIT 4" |
| ) |
| stats["recent_articles"] = [ |
| {"title": r["title"], "date": r["date"], "url": r["url"]} |
| for r in res_art_list |
| ] |
| except Exception as e: |
| print(f"β οΈ [ν΅κ³ μ‘°ν μ€ν¨] Neo4j ν΅κ³λ₯Ό κ°μ Έμ€λ λ° μ€ν¨νμ΅λλ€: {e}") |
| return stats |
|
|
|
|
| |
| |
| |
|
|
| |
| try: |
| gradio_major = int(gr.__version__.split(".")[0]) |
| except Exception: |
| gradio_major = 4 |
|
|
| theme_obj = gr.themes.Soft( |
| font=["Pretendard", "-apple-system", "BlinkMacSystemFont", "system-ui", "sans-serif"], |
| primary_hue="sky", |
| secondary_hue="slate", |
| ) |
|
|
|
|
| CHATBOT_DESCRIPTION = """ |
| <div class="prose"> |
| <h3>π AI κΈ°λ° κΈμ΅/νν
ν¬ νμ νΈλ λλ₯Ό λΆμνλ μ§μ κ·Έλν(GraphRAG)μ μ§λ¬ΈνμΈμ.</h3> |
| <ul> |
| <li>π° <b>κΈμ΅μ¬/νν
ν¬ AI λν₯</b> β μ νμν, μΉ΄μΉ΄μ€νμ΄, ν μ€λ±
ν¬, λ€μ΄λ²νμ΄ λ±μ μ΅μ κΈμ΅ AI νΈλ λ</li> |
| <li>π¬ <b>νν
ν¬ ν΅μ¬ κΈ°μ λΆμ</b> β λ‘보μ΄λλ°μ΄μ , λμμ μ©νκ°, AI FDS, κΈμ΅ λ§μ΄λ°μ΄ν° λ± μ 리</li> |
| <li>π <b>μ€μ λ΄μ€ μΆμ² μ 곡</b> β λ΅λ³λ§λ€ μ€μ 보λλ κ·Όκ±° κΈ°μ¬ λ° μΆμ² URL ν¬ν¨</li> |
| </ul> |
| <p>π μλ μμ μ§λ¬Έ λ²νΌμ ν΄λ¦νκ±°λ μ§μ μ
λ ₯ν΄ λ³΄μΈμ.</p> |
| </div> |
| """ |
|
|
| interface_kwargs = { |
| "fn": chat, |
| "chatbot": gr.Chatbot(height=700, placeholder=CHATBOT_DESCRIPTION), |
| "textbox": gr.Textbox( |
| placeholder="λΆμνκ³ μΆμ λ΄μ©μ μμ°μ΄λ‘ μ
λ ₯ν΄μ£ΌμΈμ...", |
| container=False, |
| scale=7, |
| submit_btn="μ μ‘", |
| ), |
| "examples": [ |
| "μ νμνμ 'μ ν AI μ ν¬νΈν΄λ¦¬μ€' λ‘보μ΄λλ°μ΄μ κΈ°μ κ³Ό κ°μΈ λ§μΆ€ν μλΉμ€μ νΉμ§μ μ€λͺ
ν΄μ€", |
| "μΉ΄μΉ΄μ€νμ΄κ° μ¬νμΌλ¬λ₯Ό μν΄ κ°λ°ν 'AI λμμ μ©νκ°' λͺ¨λΈμ μ₯μ κ³Ό λμΆ μΉμΈ ν¨κ³Όλ 무μμΈκ°μ?", |
| "ν μ€λ±
ν¬μ μ€μκ° λ³΄μ΄μ€νΌμ± νμ§ κΈ°μ μΈ 'ν μ€ AI FDS'μ μλ μ리μ μ°¨λ¨μ¨μ μλ €μ€", |
| "λ€μ΄λ²νμ΄κ° μΆμν 'AI κΈμ΅ λΉμ'κ° λ§μ΄λ°μ΄ν°μ κ²°ν©νμ¬ μ 곡νλ λ§μΆ€ μμ° κ°μ΄λλ μ΄λ€ κ²μΈκ°μ?", |
| ], |
| "cache_examples": False, |
| } |
|
|
| |
| launch_kwargs = { |
| "server_name": "0.0.0.0", |
| "server_port": 7860, |
| } |
|
|
| |
| blocks_kwargs: Dict[str, Any] = {} |
| if gradio_major < 5: |
| interface_kwargs["theme"] = theme_obj |
| blocks_kwargs["theme"] = theme_obj |
| blocks_kwargs["css"] = CUSTOM_CSS |
| elif gradio_major < 6: |
| launch_kwargs["theme"] = theme_obj |
| blocks_kwargs["theme"] = theme_obj |
| blocks_kwargs["css"] = CUSTOM_CSS |
| else: |
| launch_kwargs["theme"] = theme_obj |
| launch_kwargs["css"] = CUSTOM_CSS |
|
|
| |
| with gr.Blocks(**blocks_kwargs) as demo: |
| |
| gr.HTML(""" |
| <div style="display: flex; justify-content: space-between; align-items: center; padding: 10px 20px; border-bottom: 1px solid rgba(196, 195, 236, 0.45); background-color: rgba(255, 255, 255, 0.65); backdrop-filter: blur(12px); margin: -20px -20px 6px -20px;"> |
| <div style="font-size: 20px; font-weight: 900; color: #0f172a; display: flex; align-items: center; gap: 12px;"> |
| π FinGraph <span style="font-size: 14px; font-weight: 700; color: #475569;">GraphRAG Enhanced AI Terminal</span> |
| </div> |
| </div> |
| """) |
| |
| with gr.Row(): |
| |
| with gr.Column(scale=3, min_width=320): |
| stats_data = get_db_stats() |
| stats_html = build_stats_html(stats_data) |
| gr.HTML(stats_html) |
| |
| |
| with gr.Column(scale=7, min_width=500, elem_id="chat-column"): |
| |
| chatbot_interface_kwargs: Dict[str, Any] = interface_kwargs.copy() |
| chatbot_interface_kwargs.pop("title", None) |
| chatbot_interface_kwargs.pop("description", None) |
| chatbot_interface_kwargs.pop("theme", None) |
| |
| gr.ChatInterface(**chatbot_interface_kwargs) |
|
|
| if __name__ == "__main__": |
| demo.launch(**launch_kwargs) |
|
|