Spaces:
Paused
Paused
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. ย See raw diff
- src/.DS_Store +0 -0
- src/cli/__init__.py +3 -0
- src/cli/approval_ui.py +239 -0
- src/cli/commands.py +49 -0
- src/cli/daemon.py +222 -0
- src/cli/http_client.py +237 -0
- src/cli/renderer.py +257 -0
- src/cli/shell.py +420 -0
- src/cli/terminal.py +56 -0
- src/data_collection_preprocessing/__init__.py +13 -0
- src/data_collection_preprocessing/__main__.py +5 -0
- src/data_collection_preprocessing/cli.py +76 -0
- src/data_collection_preprocessing/config.py +14 -0
- src/data_collection_preprocessing/parsers.py +235 -0
- src/data_collection_preprocessing/pipeline.py +177 -0
- src/inference/.gitkeep +0 -0
- src/inference/__init__.py +0 -0
- src/inference/actions/__init__.py +4 -0
- src/inference/actions/base.py +226 -0
- src/inference/actions/data_go_kr.py +952 -0
- src/inference/agent_loop.py +360 -0
- src/inference/agent_manager.py +134 -0
- src/inference/api_server.py +1899 -0
- src/inference/bm25_indexer.py +446 -0
- src/inference/db/__init__.py +67 -0
- src/inference/db/alembic/env.py +82 -0
- src/inference/db/alembic/script.py.mako +25 -0
- src/inference/db/alembic/versions/001_create_rag_tables.py +346 -0
- src/inference/db/converters.py +162 -0
- src/inference/db/crud.py +306 -0
- src/inference/db/database.py +77 -0
- src/inference/db/models.py +448 -0
- src/inference/document_processor.py +545 -0
- src/inference/feature_flags.py +58 -0
- src/inference/graph/__init__.py +30 -0
- src/inference/graph/builder.py +142 -0
- src/inference/graph/capabilities/__init__.py +35 -0
- src/inference/graph/capabilities/api_lookup.py +268 -0
- src/inference/graph/capabilities/append_evidence.py +133 -0
- src/inference/graph/capabilities/base.py +129 -0
- src/inference/graph/capabilities/defaults.py +94 -0
- src/inference/graph/capabilities/demographics_lookup.py +283 -0
- src/inference/graph/capabilities/draft_civil_response.py +96 -0
- src/inference/graph/capabilities/issue_detector.py +292 -0
- src/inference/graph/capabilities/keyword_analyzer.py +261 -0
- src/inference/graph/capabilities/rag_search.py +287 -0
- src/inference/graph/capabilities/registry.py +125 -0
- src/inference/graph/capabilities/stats_lookup.py +287 -0
- src/inference/graph/executor_adapter.py +190 -0
- src/inference/graph/nodes.py +752 -0
src/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
src/cli/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn CLI package."""
|
| 2 |
+
|
| 3 |
+
__version__ = "1.0.1"
|
src/cli/approval_ui.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Approval / rejection UI for GovOn CLI.
|
| 2 |
+
|
| 3 |
+
Renders a direction-keyโdriven prompt using `prompt_toolkit` when available.
|
| 4 |
+
Falls back to a plain input() prompt if prompt_toolkit is not installed.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import unicodedata
|
| 10 |
+
|
| 11 |
+
from src.cli.terminal import (
|
| 12 |
+
get_approval_box_width,
|
| 13 |
+
get_narrow_terminal_warning,
|
| 14 |
+
get_terminal_columns,
|
| 15 |
+
is_layout_supported,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
_PT_AVAILABLE = False
|
| 19 |
+
try:
|
| 20 |
+
from prompt_toolkit import Application
|
| 21 |
+
from prompt_toolkit.formatted_text import HTML
|
| 22 |
+
from prompt_toolkit.key_binding import KeyBindings
|
| 23 |
+
from prompt_toolkit.layout import Layout
|
| 24 |
+
from prompt_toolkit.layout.containers import HSplit, Window
|
| 25 |
+
from prompt_toolkit.layout.controls import FormattedTextControl
|
| 26 |
+
|
| 27 |
+
_PT_AVAILABLE = True
|
| 28 |
+
except ImportError: # pragma: no cover
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _display_width(s: str) -> int:
|
| 33 |
+
"""Return the display width of *s*, counting wide (CJK) chars as 2."""
|
| 34 |
+
w = 0
|
| 35 |
+
for ch in s:
|
| 36 |
+
eaw = unicodedata.east_asian_width(ch)
|
| 37 |
+
w += 2 if eaw in ("W", "F") else 1
|
| 38 |
+
return w
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _box_line(content: str = "", *, width: int) -> str:
|
| 42 |
+
"""Return a single box line padded to *width* display columns."""
|
| 43 |
+
pad = width - _display_width(content)
|
| 44 |
+
inner = content + " " * max(pad, 0)
|
| 45 |
+
return f"โ {inner} โ"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _build_box_lines(
|
| 49 |
+
approval_request: dict, selected: int, box_width: int | None = None
|
| 50 |
+
) -> list[str]:
|
| 51 |
+
"""Build the raw text lines of the approval box (no ANSI needed here)."""
|
| 52 |
+
goal: str = approval_request.get("goal", "")
|
| 53 |
+
reason: str = approval_request.get("reason", "")
|
| 54 |
+
tool_summaries: list[str] = approval_request.get("tool_summaries") or []
|
| 55 |
+
|
| 56 |
+
w = get_approval_box_width(get_terminal_columns()) if box_width is None else box_width
|
| 57 |
+
_header = "โ ์์
์น์ธ ์์ฒญ "
|
| 58 |
+
top = "โ" + _header + "โ" * max(w - _display_width(_header) + 2, 0) + "โ"
|
| 59 |
+
bot = "โ" + "โ" * (w + 2) + "โ"
|
| 60 |
+
|
| 61 |
+
lines: list[str] = [top, _box_line(width=w)]
|
| 62 |
+
|
| 63 |
+
def _wrap(label: str, value: str) -> None:
|
| 64 |
+
prefix = f" {label}: "
|
| 65 |
+
available = max(w - _display_width(prefix), 1)
|
| 66 |
+
if _display_width(value) <= available:
|
| 67 |
+
lines.append(_box_line(f"{prefix}{value}", width=w))
|
| 68 |
+
else:
|
| 69 |
+
# Truncate value to fit within available display columns
|
| 70 |
+
chunk: list[str] = []
|
| 71 |
+
used = 0
|
| 72 |
+
for ch in value:
|
| 73 |
+
cw = 2 if unicodedata.east_asian_width(ch) in ("W", "F") else 1
|
| 74 |
+
if used + cw > available:
|
| 75 |
+
break
|
| 76 |
+
chunk.append(ch)
|
| 77 |
+
used += cw
|
| 78 |
+
first = "".join(chunk)
|
| 79 |
+
lines.append(_box_line(f"{prefix}{first}", width=w))
|
| 80 |
+
rest = value[len(first) :]
|
| 81 |
+
while rest:
|
| 82 |
+
row: list[str] = []
|
| 83 |
+
used = 0
|
| 84 |
+
col_limit = w - 4
|
| 85 |
+
for ch in rest:
|
| 86 |
+
cw = 2 if unicodedata.east_asian_width(ch) in ("W", "F") else 1
|
| 87 |
+
if used + cw > col_limit:
|
| 88 |
+
break
|
| 89 |
+
row.append(ch)
|
| 90 |
+
used += cw
|
| 91 |
+
seg = "".join(row)
|
| 92 |
+
lines.append(_box_line(f" {seg}", width=w))
|
| 93 |
+
rest = rest[len(seg) :]
|
| 94 |
+
|
| 95 |
+
_wrap("๋ชฉํ", goal)
|
| 96 |
+
_wrap("์ด์ ", reason)
|
| 97 |
+
|
| 98 |
+
if tool_summaries:
|
| 99 |
+
lines.append(_box_line(width=w))
|
| 100 |
+
lines.append(_box_line(" ์ํํ ์์
:", width=w))
|
| 101 |
+
for idx, summary in enumerate(tool_summaries, 1):
|
| 102 |
+
prefix = f" {idx}. "
|
| 103 |
+
avail = max(w - _display_width(prefix), 1)
|
| 104 |
+
if _display_width(summary) <= avail:
|
| 105 |
+
lines.append(_box_line(f"{prefix}{summary}", width=w))
|
| 106 |
+
else:
|
| 107 |
+
chunk2: list[str] = []
|
| 108 |
+
used2 = 0
|
| 109 |
+
for ch in summary:
|
| 110 |
+
cw = 2 if unicodedata.east_asian_width(ch) in ("W", "F") else 1
|
| 111 |
+
if used2 + cw > avail:
|
| 112 |
+
break
|
| 113 |
+
chunk2.append(ch)
|
| 114 |
+
used2 += cw
|
| 115 |
+
first2 = "".join(chunk2)
|
| 116 |
+
lines.append(_box_line(f"{prefix}{first2}", width=w))
|
| 117 |
+
rest2 = summary[len(first2) :]
|
| 118 |
+
while rest2:
|
| 119 |
+
row2: list[str] = []
|
| 120 |
+
used2 = 0
|
| 121 |
+
col_limit2 = max(w - 7, 1)
|
| 122 |
+
for ch in rest2:
|
| 123 |
+
cw = 2 if unicodedata.east_asian_width(ch) in ("W", "F") else 1
|
| 124 |
+
if used2 + cw > col_limit2:
|
| 125 |
+
break
|
| 126 |
+
row2.append(ch)
|
| 127 |
+
used2 += cw
|
| 128 |
+
seg2 = "".join(row2)
|
| 129 |
+
lines.append(_box_line(f" {seg2}", width=w))
|
| 130 |
+
rest2 = rest2[len(seg2) :]
|
| 131 |
+
|
| 132 |
+
lines.append(_box_line(width=w))
|
| 133 |
+
approve_bullet = "โ" if selected == 0 else "โ"
|
| 134 |
+
reject_bullet = "โ" if selected == 1 else "โ"
|
| 135 |
+
lines.append(_box_line(f" {approve_bullet} ์น์ธ", width=w))
|
| 136 |
+
lines.append(_box_line(f" {reject_bullet} ๊ฑฐ์ ", width=w))
|
| 137 |
+
lines.append(bot)
|
| 138 |
+
return lines
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def show_approval_prompt(approval_request: dict) -> bool:
|
| 142 |
+
"""Show an interactive approval / rejection prompt.
|
| 143 |
+
|
| 144 |
+
Returns True if approved, False if rejected.
|
| 145 |
+
"""
|
| 146 |
+
terminal_columns = get_terminal_columns()
|
| 147 |
+
if not is_layout_supported(terminal_columns):
|
| 148 |
+
print(get_narrow_terminal_warning(terminal_columns))
|
| 149 |
+
return _fallback_prompt(approval_request, columns=terminal_columns)
|
| 150 |
+
|
| 151 |
+
if not _PT_AVAILABLE:
|
| 152 |
+
return _fallback_prompt(approval_request, columns=terminal_columns)
|
| 153 |
+
|
| 154 |
+
return _pt_prompt(approval_request, columns=terminal_columns)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _pt_prompt(approval_request: dict, *, columns: int) -> bool:
|
| 158 |
+
"""prompt_toolkitโbased arrow-key selection UI."""
|
| 159 |
+
state = {"selected": 0, "result": None}
|
| 160 |
+
box_width = get_approval_box_width(columns)
|
| 161 |
+
|
| 162 |
+
def get_text():
|
| 163 |
+
# Keep a stable width for a single prompt interaction.
|
| 164 |
+
lines = _build_box_lines(approval_request, state["selected"], box_width=box_width)
|
| 165 |
+
return "\n".join(lines) + "\n\nโโ ๋ฐฉํฅํค๋ก ์ ํ, Enter๋ก ํ์ "
|
| 166 |
+
|
| 167 |
+
kb = KeyBindings()
|
| 168 |
+
|
| 169 |
+
@kb.add("up")
|
| 170 |
+
@kb.add("k")
|
| 171 |
+
def _up(event):
|
| 172 |
+
state["selected"] = (state["selected"] - 1) % 2
|
| 173 |
+
_refresh_control()
|
| 174 |
+
|
| 175 |
+
@kb.add("down")
|
| 176 |
+
@kb.add("j")
|
| 177 |
+
def _down(event):
|
| 178 |
+
state["selected"] = (state["selected"] + 1) % 2
|
| 179 |
+
_refresh_control()
|
| 180 |
+
|
| 181 |
+
@kb.add("enter")
|
| 182 |
+
def _confirm(event):
|
| 183 |
+
state["result"] = state["selected"] == 0
|
| 184 |
+
event.app.exit()
|
| 185 |
+
|
| 186 |
+
@kb.add("q")
|
| 187 |
+
@kb.add("c-c")
|
| 188 |
+
def _cancel(event):
|
| 189 |
+
state["result"] = False
|
| 190 |
+
event.app.exit()
|
| 191 |
+
|
| 192 |
+
control = FormattedTextControl(text=get_text)
|
| 193 |
+
window = Window(content=control)
|
| 194 |
+
layout = Layout(HSplit([window]))
|
| 195 |
+
|
| 196 |
+
def _refresh_control():
|
| 197 |
+
control.text = get_text # keep as callable
|
| 198 |
+
app.invalidate()
|
| 199 |
+
|
| 200 |
+
app: Application = Application(layout=layout, key_bindings=kb, full_screen=False)
|
| 201 |
+
app.run()
|
| 202 |
+
|
| 203 |
+
return bool(state["result"])
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def _fallback_prompt(approval_request: dict, columns: int | None = None) -> bool:
|
| 207 |
+
"""Plain input() fallback when prompt_toolkit is unavailable."""
|
| 208 |
+
goal: str = approval_request.get("goal", "")
|
| 209 |
+
reason: str = approval_request.get("reason", "")
|
| 210 |
+
tool_summaries: list[str] = approval_request.get("tool_summaries") or []
|
| 211 |
+
terminal_columns = get_terminal_columns() if columns is None else columns
|
| 212 |
+
separator = "โ" * max(terminal_columns - 2, 12)
|
| 213 |
+
title = " ์์
์น์ธ ์์ฒญ "
|
| 214 |
+
title_width = _display_width(title)
|
| 215 |
+
if terminal_columns > title_width:
|
| 216 |
+
fill_width = terminal_columns - title_width
|
| 217 |
+
left_fill = fill_width // 2
|
| 218 |
+
right_fill = fill_width - left_fill
|
| 219 |
+
title_line = f"{'โ' * left_fill}{title}{'โ' * right_fill}"
|
| 220 |
+
else:
|
| 221 |
+
title_line = title
|
| 222 |
+
|
| 223 |
+
print(f"\n{title_line}")
|
| 224 |
+
if goal:
|
| 225 |
+
print(f" ๋ชฉํ: {goal}")
|
| 226 |
+
if reason:
|
| 227 |
+
print(f" ์ด์ : {reason}")
|
| 228 |
+
if tool_summaries:
|
| 229 |
+
print("\n ์ํํ ์์
:")
|
| 230 |
+
for idx, s in enumerate(tool_summaries, 1):
|
| 231 |
+
print(f" {idx}. {s}")
|
| 232 |
+
print(separator)
|
| 233 |
+
|
| 234 |
+
try:
|
| 235 |
+
answer = input("์น์ธํ์๊ฒ ์ต๋๊น? (y/n): ").strip().lower()
|
| 236 |
+
except (EOFError, KeyboardInterrupt):
|
| 237 |
+
return False
|
| 238 |
+
|
| 239 |
+
return answer in ("y", "yes", "์", "๋ค")
|
src/cli/commands.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Slash command parser and handler for GovOn CLI."""
|
| 2 |
+
|
| 3 |
+
COMMANDS: dict[str, str] = {
|
| 4 |
+
"/help": "์ฌ์ฉ ๊ฐ๋ฅํ ๋ช
๋ น๊ณผ ๋์๋ง์ ํ์ํฉ๋๋ค.",
|
| 5 |
+
"/clear": "ํฐ๋ฏธ๋ ํ๋ฉด์ ์ด๊ธฐํํฉ๋๋ค.",
|
| 6 |
+
"/exit": "์
ธ์ ์ข
๋ฃํฉ๋๋ค.",
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
_HELP_TEXT = """GovOn CLI ์ฌ์ฉ๋ฒ
|
| 10 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 11 |
+
govon ์ธํฐ๋ํฐ๋ธ REPL ๋ชจ๋
|
| 12 |
+
govon "์ง๋ฌธ" ๋จ๋ฐ ์คํ ๋ชจ๋
|
| 13 |
+
govon --session <id> ๊ธฐ์กด ์ธ์
์ฌ๊ฐ
|
| 14 |
+
govon --session <id> "์ง๋ฌธ" ๊ธฐ์กด ์ธ์
์์ ๋จ๋ฐ ์คํ
|
| 15 |
+
govon --status daemon ์ํ ํ์ธ
|
| 16 |
+
govon --stop daemon ์ค์ง
|
| 17 |
+
|
| 18 |
+
์ฌ๋์ ๋ช
๋ น
|
| 19 |
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"""
|
| 20 |
+
|
| 21 |
+
for _cmd, _desc in COMMANDS.items():
|
| 22 |
+
_HELP_TEXT += f"\n {_cmd:<10} {_desc}"
|
| 23 |
+
|
| 24 |
+
_HELP_TEXT += "\nโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n์
๋ฌด ์์ฒญ์ ์์ฐ์ด๋ก ์ง์ ์
๋ ฅํ์ธ์."
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def is_command(text: str) -> bool:
|
| 28 |
+
"""Return True if text is a slash command."""
|
| 29 |
+
return text.strip().startswith("/")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def handle_command(text: str) -> str | None:
|
| 33 |
+
"""Execute a slash command and return a result string, or None.
|
| 34 |
+
|
| 35 |
+
Raises SystemExit for /exit.
|
| 36 |
+
"""
|
| 37 |
+
cmd = text.strip().split()[0].lower()
|
| 38 |
+
|
| 39 |
+
if cmd == "/help":
|
| 40 |
+
return _HELP_TEXT
|
| 41 |
+
|
| 42 |
+
if cmd == "/clear":
|
| 43 |
+
print("\033[2J\033[H", end="", flush=True)
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
if cmd == "/exit":
|
| 47 |
+
raise SystemExit(0)
|
| 48 |
+
|
| 49 |
+
return f"์ ์ ์๋ ๋ช
๋ น์
๋๋ค: {cmd}\n/help๋ฅผ ์
๋ ฅํ์ธ์."
|
src/cli/daemon.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn daemon lifecycle ๊ด๋ฆฌ.
|
| 2 |
+
|
| 3 |
+
Issue #144: CLI-daemon/LangGraph runtime ์ฐ๋ ๋ฐ session resume.
|
| 4 |
+
|
| 5 |
+
uvicorn์ผ๋ก ๋ฐฑ๊ทธ๋ผ์ด๋์์ GovOn API ์๋ฒ๋ฅผ ๊ธฐ๋ํ๊ณ ,
|
| 6 |
+
PID ํ์ผ๋ก ํ๋ก์ธ์ค ์ํ๋ฅผ ์ถ์ ํ๋ค.
|
| 7 |
+
|
| 8 |
+
.. note::
|
| 9 |
+
์ด ๋ชจ๋์ **๋ก์ปฌ daemon ์ ์ฉ**์
๋๋ค.
|
| 10 |
+
์๊ฒฉ ์๋ฒ์ ์ฐ๊ฒฐํ ๋๋ ``GOVON_RUNTIME_URL`` ํ๊ฒฝ๋ณ์๋ฅผ ์ค์ ํ๋ฉด
|
| 11 |
+
``shell.py``์ ``main()``์ด ์ด ๋ชจ๋์ ์์ ํ ๊ฑด๋๋ฐ๊ณ ์ง์ ๋ URL์
|
| 12 |
+
์ง์ ์ฐ๊ฒฐํฉ๋๋ค. Docker, ํด๋ผ์ฐ๋ ๋ฐฐํฌ, CI ํ๊ฒฝ์์๋ ํด๋น ๋ฐฉ์์
|
| 13 |
+
์ฌ์ฉํ๋ ๊ฒ์ ๊ถ์ฅํฉ๋๋ค.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import signal
|
| 20 |
+
import subprocess
|
| 21 |
+
import sys
|
| 22 |
+
import time
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Optional
|
| 25 |
+
|
| 26 |
+
import httpx
|
| 27 |
+
from loguru import logger
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class DaemonManager:
|
| 31 |
+
"""GovOn API ์๋ฒ daemon lifecycle ๊ด๋ฆฌ์.
|
| 32 |
+
|
| 33 |
+
PID ํ์ผ๊ณผ /health ์๋ํฌ์ธํธ๋ฅผ ๊ฒฐํฉํ์ฌ daemon ์ํ๋ฅผ ํ์ธํ๊ณ ,
|
| 34 |
+
ํ์ ์ uvicorn์ผ๋ก ๋ฐฑ๊ทธ๋ผ์ด๋ ๊ธฐ๋ํ๋ค.
|
| 35 |
+
|
| 36 |
+
ํ๊ฒฝ๋ณ์ ``GOVON_PORT``๋ก ํฌํธ๋ฅผ ์ค๋ฒ๋ผ์ด๋ํ ์ ์๋ค (๊ธฐ๋ณธ: 8000).
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
GOVON_HOME = Path.home() / ".govon"
|
| 40 |
+
_HEALTH_CHECK_TIMEOUT = 120 # ์ต๋ ๋๊ธฐ ์ด
|
| 41 |
+
_HEALTH_CHECK_INTERVAL = 1 # ์ฌ์๋ ๊ฐ๊ฒฉ (์ด)
|
| 42 |
+
|
| 43 |
+
def __init__(self) -> None:
|
| 44 |
+
self.GOVON_HOME.mkdir(parents=True, exist_ok=True)
|
| 45 |
+
self.port: int = int(os.environ.get("GOVON_PORT", "8000"))
|
| 46 |
+
self.pid_path: Path = self.GOVON_HOME / "daemon.pid"
|
| 47 |
+
self.log_path: Path = self.GOVON_HOME / "daemon.log"
|
| 48 |
+
|
| 49 |
+
def get_base_url(self) -> str:
|
| 50 |
+
"""daemon base URL์ ๋ฐํํ๋ค."""
|
| 51 |
+
return f"http://127.0.0.1:{self.port}"
|
| 52 |
+
|
| 53 |
+
def is_running(self) -> bool:
|
| 54 |
+
"""daemon์ด ์คํ ์ค์ธ์ง ํ์ธํ๋ค.
|
| 55 |
+
|
| 56 |
+
PID ํ์ผ์ด ์กด์ฌํ๊ณ ํด๋น ํ๋ก์ธ์ค๊ฐ ์ด์ ์์ผ๋ฉฐ,
|
| 57 |
+
/health ์๋ํฌ์ธํธ๊ฐ ์๋ตํ ๋ True๋ฅผ ๋ฐํํ๋ค.
|
| 58 |
+
"""
|
| 59 |
+
pid = self._read_pid()
|
| 60 |
+
if pid is None:
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
# PID ํ๋ก์ธ์ค ์์กด ํ์ธ
|
| 64 |
+
if not self._pid_alive(pid):
|
| 65 |
+
logger.debug(f"[daemon] PID {pid} ํ๋ก์ธ์ค๊ฐ ์์. PID ํ์ผ ์ ๊ฑฐ.")
|
| 66 |
+
self._remove_pid()
|
| 67 |
+
return False
|
| 68 |
+
|
| 69 |
+
# /health HTTP ํ์ธ
|
| 70 |
+
try:
|
| 71 |
+
with httpx.Client(timeout=5.0) as client:
|
| 72 |
+
resp = client.get(f"{self.get_base_url()}/health")
|
| 73 |
+
return resp.status_code == 200
|
| 74 |
+
except (httpx.ConnectError, httpx.TimeoutException, Exception):
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
def start(self) -> bool:
|
| 78 |
+
"""uvicorn์ ๋ฐฑ๊ทธ๋ผ์ด๋๋ก ๊ธฐ๋ํ๊ณ PID๋ฅผ ๊ธฐ๋กํ๋ค.
|
| 79 |
+
|
| 80 |
+
Returns
|
| 81 |
+
-------
|
| 82 |
+
bool
|
| 83 |
+
๊ธฐ๋ ์ฑ๊ณต ์ฌ๋ถ (health check ํต๊ณผ ์ True).
|
| 84 |
+
"""
|
| 85 |
+
# ๋ ์ด์ค ์ปจ๋์
๋ฐฉ์ง: ๊ธฐ๋ ์ ํ ๋ฒ ๋ health check
|
| 86 |
+
if self.is_running():
|
| 87 |
+
logger.info("[daemon] ์ด๋ฏธ ์คํ ์ค์
๋๋ค.")
|
| 88 |
+
return True
|
| 89 |
+
|
| 90 |
+
cmd = [
|
| 91 |
+
sys.executable,
|
| 92 |
+
"-m",
|
| 93 |
+
"uvicorn",
|
| 94 |
+
"src.inference.api_server:app",
|
| 95 |
+
"--host",
|
| 96 |
+
"127.0.0.1",
|
| 97 |
+
"--port",
|
| 98 |
+
str(self.port),
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
logger.info(f"[daemon] ๊ธฐ๋ ๋ช
๋ น: {' '.join(cmd)}")
|
| 102 |
+
|
| 103 |
+
with open(self.log_path, "a") as log_file:
|
| 104 |
+
proc = subprocess.Popen(
|
| 105 |
+
cmd,
|
| 106 |
+
stdout=log_file,
|
| 107 |
+
stderr=log_file,
|
| 108 |
+
start_new_session=True,
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
self._write_pid(proc.pid)
|
| 112 |
+
logger.info(f"[daemon] ํ๋ก์ธ์ค ๊ธฐ๋ ์๋ฃ. PID={proc.pid}")
|
| 113 |
+
|
| 114 |
+
# health check ๋๊ธฐ
|
| 115 |
+
return self._wait_until_healthy()
|
| 116 |
+
|
| 117 |
+
def stop(self) -> None:
|
| 118 |
+
"""daemon์ ์ ์ ์ข
๋ฃํ๋ค (SIGTERM โ timeout ํ SIGKILL)."""
|
| 119 |
+
pid = self._read_pid()
|
| 120 |
+
if pid is None:
|
| 121 |
+
logger.info("[daemon] PID ํ์ผ์ด ์์ต๋๋ค. ์คํ ์ค์ด ์๋ ๊ฒ์ผ๋ก ๊ฐ์ฃผํฉ๋๋ค.")
|
| 122 |
+
return
|
| 123 |
+
|
| 124 |
+
if not self._pid_alive(pid):
|
| 125 |
+
logger.info(f"[daemon] PID {pid} ํ๋ก์ธ์ค๊ฐ ์์ต๋๋ค.")
|
| 126 |
+
self._remove_pid()
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
logger.info(f"[daemon] SIGTERM ์ ์ก: PID={pid}")
|
| 130 |
+
os.kill(pid, signal.SIGTERM)
|
| 131 |
+
|
| 132 |
+
# ์ต๋ 10์ด ๋๊ธฐ
|
| 133 |
+
for _ in range(10):
|
| 134 |
+
time.sleep(1)
|
| 135 |
+
if not self._pid_alive(pid):
|
| 136 |
+
logger.info(f"[daemon] PID {pid} ์ ์ ์ข
๋ฃ๋จ.")
|
| 137 |
+
self._remove_pid()
|
| 138 |
+
return
|
| 139 |
+
|
| 140 |
+
logger.warning(f"[daemon] SIGKILL ์ ์ก: PID={pid}")
|
| 141 |
+
try:
|
| 142 |
+
os.kill(pid, signal.SIGKILL)
|
| 143 |
+
except ProcessLookupError:
|
| 144 |
+
pass
|
| 145 |
+
self._remove_pid()
|
| 146 |
+
|
| 147 |
+
def ensure_running(self) -> str:
|
| 148 |
+
"""daemon์ด ์คํ ์ค์์ ๋ณด์ฅํ๊ณ base URL์ ๋ฐํํ๋ค.
|
| 149 |
+
|
| 150 |
+
์คํ ์ค์ด ์๋๋ฉด start()๋ฅผ ํธ์ถํ๋ค.
|
| 151 |
+
|
| 152 |
+
Returns
|
| 153 |
+
-------
|
| 154 |
+
str
|
| 155 |
+
daemon base URL (์: "http://127.0.0.1:8000").
|
| 156 |
+
|
| 157 |
+
Raises
|
| 158 |
+
------
|
| 159 |
+
RuntimeError
|
| 160 |
+
daemon ๊ธฐ๋์ ์คํจํ ๊ฒฝ์ฐ.
|
| 161 |
+
"""
|
| 162 |
+
if not self.is_running():
|
| 163 |
+
success = self.start()
|
| 164 |
+
if not success:
|
| 165 |
+
raise RuntimeError(
|
| 166 |
+
"GovOn daemon ๊ธฐ๋์ ์คํจํ์ต๋๋ค. " f"๋ก๊ทธ๋ฅผ ํ์ธํ์ธ์: {self.log_path}"
|
| 167 |
+
)
|
| 168 |
+
return self.get_base_url()
|
| 169 |
+
|
| 170 |
+
# ------------------------------------------------------------------
|
| 171 |
+
# ๋ด๋ถ ํฌํผ
|
| 172 |
+
# ------------------------------------------------------------------
|
| 173 |
+
|
| 174 |
+
def _read_pid(self) -> Optional[int]:
|
| 175 |
+
"""PID ํ์ผ์์ PID๋ฅผ ์ฝ๋๋ค. ํ์ผ์ด ์์ผ๋ฉด None."""
|
| 176 |
+
if not self.pid_path.exists():
|
| 177 |
+
return None
|
| 178 |
+
try:
|
| 179 |
+
first_line = self.pid_path.read_text().strip().splitlines()[0]
|
| 180 |
+
return int(first_line.split()[0])
|
| 181 |
+
except (ValueError, OSError, IndexError):
|
| 182 |
+
return None
|
| 183 |
+
|
| 184 |
+
def _write_pid(self, pid: int) -> None:
|
| 185 |
+
"""PID์ ๊ธฐ๋ ์๊ฐ(epoch timestamp)์ ํ์ผ์ ๊ธฐ๋กํ๋ค."""
|
| 186 |
+
self.pid_path.write_text(f"{pid} {int(time.time())}")
|
| 187 |
+
|
| 188 |
+
def _remove_pid(self) -> None:
|
| 189 |
+
"""PID ํ์ผ์ ์ ๊ฑฐํ๋ค."""
|
| 190 |
+
try:
|
| 191 |
+
self.pid_path.unlink()
|
| 192 |
+
except FileNotFoundError:
|
| 193 |
+
pass
|
| 194 |
+
|
| 195 |
+
@staticmethod
|
| 196 |
+
def _pid_alive(pid: int) -> bool:
|
| 197 |
+
"""ํ๋ก์ธ์ค๊ฐ ์ด์ ์๋์ง ํ์ธํ๋ค."""
|
| 198 |
+
try:
|
| 199 |
+
os.kill(pid, 0)
|
| 200 |
+
return True
|
| 201 |
+
except ProcessLookupError:
|
| 202 |
+
return False
|
| 203 |
+
except PermissionError:
|
| 204 |
+
# ํ๋ก์ธ์ค๊ฐ ์กด์ฌํ์ง๋ง ๊ถํ์ด ์๋ ๊ฒฝ์ฐ โ ์ด์ ์์์ผ๋ก ๊ฐ์ฃผ
|
| 205 |
+
return True
|
| 206 |
+
|
| 207 |
+
def _wait_until_healthy(self) -> bool:
|
| 208 |
+
"""health check๊ฐ ํต๊ณผํ ๋๊น์ง ์ต๋ 120์ด ๋๊ธฐํ๋ค."""
|
| 209 |
+
deadline = time.monotonic() + self._HEALTH_CHECK_TIMEOUT
|
| 210 |
+
while time.monotonic() < deadline:
|
| 211 |
+
try:
|
| 212 |
+
with httpx.Client(timeout=3.0) as client:
|
| 213 |
+
resp = client.get(f"{self.get_base_url()}/health")
|
| 214 |
+
if resp.status_code == 200:
|
| 215 |
+
logger.info("[daemon] health check ํต๊ณผ.")
|
| 216 |
+
return True
|
| 217 |
+
except (httpx.ConnectError, httpx.TimeoutException, Exception):
|
| 218 |
+
pass
|
| 219 |
+
time.sleep(self._HEALTH_CHECK_INTERVAL)
|
| 220 |
+
|
| 221 |
+
logger.error("[daemon] health check timeout (120์ด).")
|
| 222 |
+
return False
|
src/cli/http_client.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn ๋ก์ปฌ daemon API HTTP ํด๋ผ์ด์ธํธ.
|
| 2 |
+
|
| 3 |
+
Issue #144: CLI-daemon/LangGraph runtime ์ฐ๋ ๋ฐ session resume.
|
| 4 |
+
Issue #140: CLI ์น์ธ UI ๋ฐ ์ต์ ๋ช
๋ น ์ฒด๊ณ (๋ฐฑ์๋ ๋ถ๋ถ).
|
| 5 |
+
|
| 6 |
+
๋ก์ปฌ daemon(uvicorn)์ REST API๋ฅผ ๋ํํ๋ ํด๋ผ์ด์ธํธ.
|
| 7 |
+
run / approve / cancel ๋ฑ ํต์ฌ ์๋ํฌ์ธํธ์ ์ ๊ทผํ๋ค.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
from typing import Any, Dict, Generator, Iterator, Optional
|
| 14 |
+
|
| 15 |
+
import httpx
|
| 16 |
+
from loguru import logger
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class GovOnClient:
|
| 20 |
+
"""GovOn ๋ก์ปฌ daemon HTTP ํด๋ผ์ด์ธํธ.
|
| 21 |
+
|
| 22 |
+
Parameters
|
| 23 |
+
----------
|
| 24 |
+
base_url : str
|
| 25 |
+
daemon base URL (์: "http://127.0.0.1:8000").
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_RUN_TIMEOUT = 120.0
|
| 29 |
+
_DEFAULT_TIMEOUT = 30.0
|
| 30 |
+
|
| 31 |
+
def __init__(self, base_url: str) -> None:
|
| 32 |
+
self._base_url = base_url.rstrip("/")
|
| 33 |
+
|
| 34 |
+
# ------------------------------------------------------------------
|
| 35 |
+
# ๊ณต๊ฐ API
|
| 36 |
+
# ------------------------------------------------------------------
|
| 37 |
+
|
| 38 |
+
def health(self) -> Dict[str, Any]:
|
| 39 |
+
"""GET /health โ daemon ์ํ๋ฅผ ํ์ธํ๋ค.
|
| 40 |
+
|
| 41 |
+
Returns
|
| 42 |
+
-------
|
| 43 |
+
dict
|
| 44 |
+
์๋ฒ๊ฐ ๋ฐํํ๋ health ์๋ต.
|
| 45 |
+
|
| 46 |
+
Raises
|
| 47 |
+
------
|
| 48 |
+
ConnectionError
|
| 49 |
+
daemon์ ์ฐ๊ฒฐํ ์ ์์ ๋.
|
| 50 |
+
"""
|
| 51 |
+
return self._get("/health", timeout=self._DEFAULT_TIMEOUT)
|
| 52 |
+
|
| 53 |
+
def run(
|
| 54 |
+
self,
|
| 55 |
+
query: str,
|
| 56 |
+
session_id: Optional[str] = None,
|
| 57 |
+
) -> Dict[str, Any]:
|
| 58 |
+
"""POST /v2/agent/run โ ์์ด์ ํธ ์คํ ์์ฒญ.
|
| 59 |
+
|
| 60 |
+
Parameters
|
| 61 |
+
----------
|
| 62 |
+
query : str
|
| 63 |
+
์ฌ์ฉ์ ์
๋ ฅ ์ฟผ๋ฆฌ.
|
| 64 |
+
session_id : str | None
|
| 65 |
+
๊ธฐ์กด ์ธ์
์ ์ด์ด๋ฐ์ ๊ฒฝ์ฐ session ID.
|
| 66 |
+
|
| 67 |
+
Returns
|
| 68 |
+
-------
|
| 69 |
+
dict
|
| 70 |
+
์๋ฒ ์๋ต (thread_id, status ๋ฑ ํฌํจ).
|
| 71 |
+
"""
|
| 72 |
+
body: Dict[str, Any] = {"query": query}
|
| 73 |
+
if session_id is not None:
|
| 74 |
+
body["session_id"] = session_id
|
| 75 |
+
|
| 76 |
+
logger.debug(f"[http_client] run: session_id={session_id} query_len={len(query)}")
|
| 77 |
+
return self._post("/v2/agent/run", body=body, timeout=self._RUN_TIMEOUT)
|
| 78 |
+
|
| 79 |
+
def approve(self, thread_id: str, approved: bool) -> Dict[str, Any]:
|
| 80 |
+
"""POST /v2/agent/approve โ ์น์ธ ๋๋ ๊ฑฐ์ .
|
| 81 |
+
|
| 82 |
+
Parameters
|
| 83 |
+
----------
|
| 84 |
+
thread_id : str
|
| 85 |
+
์น์ธ/๊ฑฐ์ ํ graph thread ID.
|
| 86 |
+
approved : bool
|
| 87 |
+
True์ด๋ฉด ์น์ธ, False์ด๋ฉด ๊ฑฐ์ .
|
| 88 |
+
|
| 89 |
+
Returns
|
| 90 |
+
-------
|
| 91 |
+
dict
|
| 92 |
+
์๋ฒ ์๋ต.
|
| 93 |
+
"""
|
| 94 |
+
logger.debug(f"[http_client] approve: thread_id={thread_id} approved={approved}")
|
| 95 |
+
return self._post_params(
|
| 96 |
+
"/v2/agent/approve",
|
| 97 |
+
params={"thread_id": thread_id, "approved": str(approved).lower()},
|
| 98 |
+
timeout=self._DEFAULT_TIMEOUT,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
def stream(
|
| 102 |
+
self,
|
| 103 |
+
query: str,
|
| 104 |
+
session_id: Optional[str] = None,
|
| 105 |
+
) -> Generator[Dict[str, Any], None, None]:
|
| 106 |
+
"""POST /v2/agent/stream โ SSE ์คํธ๋ฆฌ๋ฐ์ผ๋ก ๋
ธ๋๋ณ ์ด๋ฒคํธ๋ฅผ ์์ ํ๋ค.
|
| 107 |
+
|
| 108 |
+
Parameters
|
| 109 |
+
----------
|
| 110 |
+
query : str
|
| 111 |
+
์ฌ์ฉ์ ์
๋ ฅ ์ฟผ๋ฆฌ.
|
| 112 |
+
session_id : str | None
|
| 113 |
+
๊ธฐ์กด ์ธ์
์ ์ด์ด๋ฐ์ ๊ฒฝ์ฐ session ID.
|
| 114 |
+
|
| 115 |
+
Yields
|
| 116 |
+
------
|
| 117 |
+
dict
|
| 118 |
+
ํ์ฑ๋ SSE ์ด๋ฒคํธ dict. ์ต์ ``node``์ ``status`` ํค๋ฅผ ํฌํจํ๋ค.
|
| 119 |
+
|
| 120 |
+
Raises
|
| 121 |
+
------
|
| 122 |
+
ConnectionError
|
| 123 |
+
daemon์ ์ฐ๊ฒฐํ ์ ์์ ๋.
|
| 124 |
+
httpx.HTTPStatusError
|
| 125 |
+
HTTP ์ค๋ฅ ์๋ต ์.
|
| 126 |
+
"""
|
| 127 |
+
body: Dict[str, Any] = {"query": query}
|
| 128 |
+
if session_id is not None:
|
| 129 |
+
body["session_id"] = session_id
|
| 130 |
+
|
| 131 |
+
url = f"{self._base_url}/v2/agent/stream"
|
| 132 |
+
logger.debug(f"[http_client] stream: session_id={session_id} query_len={len(query)}")
|
| 133 |
+
|
| 134 |
+
try:
|
| 135 |
+
timeout = httpx.Timeout(connect=10.0, read=300.0, write=10.0, pool=10.0)
|
| 136 |
+
with httpx.Client(timeout=timeout) as client:
|
| 137 |
+
with client.stream("POST", url, json=body) as resp:
|
| 138 |
+
resp.raise_for_status()
|
| 139 |
+
for line in resp.iter_lines():
|
| 140 |
+
line = line.strip()
|
| 141 |
+
if not line:
|
| 142 |
+
continue
|
| 143 |
+
if line.startswith("data:"):
|
| 144 |
+
data_str = line[len("data:") :].strip()
|
| 145 |
+
if not data_str:
|
| 146 |
+
continue
|
| 147 |
+
try:
|
| 148 |
+
event = json.loads(data_str)
|
| 149 |
+
yield event
|
| 150 |
+
except json.JSONDecodeError:
|
| 151 |
+
logger.warning(f"[http_client] SSE JSON ํ์ฑ ์คํจ: {data_str!r}")
|
| 152 |
+
continue
|
| 153 |
+
except httpx.ConnectError as exc:
|
| 154 |
+
raise ConnectionError(f"daemon์ด ์คํ ์ค์ด ์๋๋๋ค. ({self._base_url})") from exc
|
| 155 |
+
except httpx.HTTPStatusError as exc:
|
| 156 |
+
logger.error(f"[http_client] HTTP {exc.response.status_code}: {url}")
|
| 157 |
+
raise
|
| 158 |
+
|
| 159 |
+
def cancel(self, thread_id: str) -> Dict[str, Any]:
|
| 160 |
+
"""POST /v2/agent/cancel โ ์คํ ์ค์ธ ์ธ์
์ทจ์.
|
| 161 |
+
|
| 162 |
+
Parameters
|
| 163 |
+
----------
|
| 164 |
+
thread_id : str
|
| 165 |
+
์ทจ์ํ graph thread ID.
|
| 166 |
+
|
| 167 |
+
Returns
|
| 168 |
+
-------
|
| 169 |
+
dict
|
| 170 |
+
์๋ฒ ์๋ต.
|
| 171 |
+
"""
|
| 172 |
+
logger.debug(f"[http_client] cancel: thread_id={thread_id}")
|
| 173 |
+
return self._post_params(
|
| 174 |
+
"/v2/agent/cancel",
|
| 175 |
+
params={"thread_id": thread_id},
|
| 176 |
+
timeout=self._DEFAULT_TIMEOUT,
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# ------------------------------------------------------------------
|
| 180 |
+
# ๋ด๋ถ ํฌํผ
|
| 181 |
+
# ------------------------------------------------------------------
|
| 182 |
+
|
| 183 |
+
def _get(self, path: str, *, timeout: float) -> Dict[str, Any]:
|
| 184 |
+
url = f"{self._base_url}{path}"
|
| 185 |
+
try:
|
| 186 |
+
with httpx.Client(timeout=timeout) as client:
|
| 187 |
+
resp = client.get(url)
|
| 188 |
+
resp.raise_for_status()
|
| 189 |
+
return resp.json()
|
| 190 |
+
except httpx.ConnectError as exc:
|
| 191 |
+
raise ConnectionError(f"daemon์ด ์คํ ์ค์ด ์๋๋๋ค. ({self._base_url})") from exc
|
| 192 |
+
except httpx.HTTPStatusError as exc:
|
| 193 |
+
logger.error(f"[http_client] HTTP {exc.response.status_code}: {url}")
|
| 194 |
+
raise
|
| 195 |
+
|
| 196 |
+
def _post(
|
| 197 |
+
self,
|
| 198 |
+
path: str,
|
| 199 |
+
*,
|
| 200 |
+
body: Dict[str, Any],
|
| 201 |
+
timeout: float,
|
| 202 |
+
) -> Dict[str, Any]:
|
| 203 |
+
url = f"{self._base_url}{path}"
|
| 204 |
+
try:
|
| 205 |
+
with httpx.Client(timeout=timeout) as client:
|
| 206 |
+
resp = client.post(url, json=body)
|
| 207 |
+
resp.raise_for_status()
|
| 208 |
+
return resp.json()
|
| 209 |
+
except httpx.ConnectError as exc:
|
| 210 |
+
raise ConnectionError(f"daemon์ด ์คํ ์ค์ด ์๋๋๋ค. ({self._base_url})") from exc
|
| 211 |
+
except httpx.HTTPStatusError as exc:
|
| 212 |
+
logger.error(f"[http_client] HTTP {exc.response.status_code}: {url}")
|
| 213 |
+
raise
|
| 214 |
+
|
| 215 |
+
def _post_params(
|
| 216 |
+
self,
|
| 217 |
+
path: str,
|
| 218 |
+
*,
|
| 219 |
+
params: Dict[str, Any],
|
| 220 |
+
timeout: float,
|
| 221 |
+
) -> Dict[str, Any]:
|
| 222 |
+
"""์ฟผ๋ฆฌ ํ๋ผ๋ฏธํฐ๋ฅผ ์ฌ์ฉํ๋ POST ์์ฒญ ํฌํผ.
|
| 223 |
+
|
| 224 |
+
`/v2/agent/approve`, `/v2/agent/cancel` ๋ฑ FastAPI ์๋ํฌ์ธํธ๊ฐ
|
| 225 |
+
์ฟผ๋ฆฌ ํ๋ผ๋ฏธํฐ๋ฅผ ๊ธฐ๋ํ ๋ ์ฌ์ฉํ๋ค.
|
| 226 |
+
"""
|
| 227 |
+
url = f"{self._base_url}{path}"
|
| 228 |
+
try:
|
| 229 |
+
with httpx.Client(timeout=timeout) as client:
|
| 230 |
+
resp = client.post(url, params=params)
|
| 231 |
+
resp.raise_for_status()
|
| 232 |
+
return resp.json()
|
| 233 |
+
except httpx.ConnectError as exc:
|
| 234 |
+
raise ConnectionError(f"daemon์ด ์คํ ์ค์ด ์๋๋๋ค. ({self._base_url})") from exc
|
| 235 |
+
except httpx.HTTPStatusError as exc:
|
| 236 |
+
logger.error(f"[http_client] HTTP {exc.response.status_code}: {url}")
|
| 237 |
+
raise
|
src/cli/renderer.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Result rendering for GovOn CLI.
|
| 2 |
+
|
| 3 |
+
Uses `rich` when available; falls back to plain print() otherwise.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from threading import Lock
|
| 9 |
+
|
| 10 |
+
from src.cli.terminal import (
|
| 11 |
+
get_narrow_terminal_warning,
|
| 12 |
+
get_panel_width,
|
| 13 |
+
get_terminal_columns,
|
| 14 |
+
is_layout_supported,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from rich.console import Console
|
| 19 |
+
from rich.panel import Panel
|
| 20 |
+
from rich.status import Status
|
| 21 |
+
from rich.text import Text
|
| 22 |
+
|
| 23 |
+
_console = Console()
|
| 24 |
+
_RICH_AVAILABLE = True
|
| 25 |
+
except ImportError: # pragma: no cover
|
| 26 |
+
_console = None # type: ignore[assignment]
|
| 27 |
+
_RICH_AVAILABLE = False
|
| 28 |
+
|
| 29 |
+
_HAS_WARNED_NARROW_TERMINAL = False
|
| 30 |
+
_NARROW_WARNING_LOCK = Lock()
|
| 31 |
+
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
# Node status message mapping
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
NODE_STATUS_MESSAGES: dict[str, str] = {
|
| 37 |
+
"session_load": "์ธ์
๋ก๋ ์คโฆ",
|
| 38 |
+
"planner": "๊ณํ ์๋ฆฝ ์คโฆ",
|
| 39 |
+
"approval_wait": "์น์ธ ๋๊ธฐ ์คโฆ",
|
| 40 |
+
"tool_execute": "๋๊ตฌ ์คํ ์คโฆ",
|
| 41 |
+
"synthesis": "๋ต๋ณ ์์ฑ ์คโฆ",
|
| 42 |
+
"persist": "์ ์ฅ ์คโฆ",
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_node_message(node_name: str) -> str:
|
| 47 |
+
"""Return a human-readable status message for a given node name."""
|
| 48 |
+
return NODE_STATUS_MESSAGES.get(node_name, f"{node_name} ์ฒ๋ฆฌ ์คโฆ")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# ---------------------------------------------------------------------------
|
| 52 |
+
# Spinner context manager
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class StreamingStatusDisplay:
|
| 57 |
+
"""Context manager that shows a spinner and updates the message per node.
|
| 58 |
+
|
| 59 |
+
Wraps rich.status.Status when rich is available; falls back to plain print().
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
def __init__(self, initial_message: str = "์ฒ๋ฆฌ ์คโฆ") -> None:
|
| 63 |
+
self._initial_message = initial_message
|
| 64 |
+
self._status: Status | None = None # type: ignore[name-defined]
|
| 65 |
+
self._use_rich = False
|
| 66 |
+
|
| 67 |
+
def __enter__(self) -> "StreamingStatusDisplay":
|
| 68 |
+
self._use_rich, _ = _resolve_render_mode()
|
| 69 |
+
if self._use_rich:
|
| 70 |
+
self._status = _console.status(self._initial_message, spinner="dots")
|
| 71 |
+
self._status.__enter__()
|
| 72 |
+
else:
|
| 73 |
+
print(f"โ {self._initial_message}", flush=True)
|
| 74 |
+
return self
|
| 75 |
+
|
| 76 |
+
def update(self, message: str) -> None:
|
| 77 |
+
"""Update the displayed status message."""
|
| 78 |
+
if self._use_rich and self._status is not None:
|
| 79 |
+
self._status.update(message)
|
| 80 |
+
else:
|
| 81 |
+
print(f"โ {message}", flush=True)
|
| 82 |
+
|
| 83 |
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
| 84 |
+
if self._use_rich and self._status is not None:
|
| 85 |
+
self._status.__exit__(exc_type, exc_val, exc_tb)
|
| 86 |
+
self._status = None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _warn_narrow_terminal_once(columns: int) -> None:
|
| 90 |
+
"""Emit the narrow-terminal fallback warning once per narrow-state entry."""
|
| 91 |
+
global _HAS_WARNED_NARROW_TERMINAL
|
| 92 |
+
|
| 93 |
+
with _NARROW_WARNING_LOCK:
|
| 94 |
+
if _HAS_WARNED_NARROW_TERMINAL:
|
| 95 |
+
return
|
| 96 |
+
_HAS_WARNED_NARROW_TERMINAL = True
|
| 97 |
+
|
| 98 |
+
print(get_narrow_terminal_warning(columns), flush=True)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _reset_narrow_warning() -> None:
|
| 102 |
+
"""Reset narrow-terminal warning state for tests and wide-terminal recovery."""
|
| 103 |
+
global _HAS_WARNED_NARROW_TERMINAL
|
| 104 |
+
|
| 105 |
+
with _NARROW_WARNING_LOCK:
|
| 106 |
+
_HAS_WARNED_NARROW_TERMINAL = False
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _resolve_render_mode() -> tuple[bool, int]:
|
| 110 |
+
"""Return (use_rich, terminal_columns) for the current render call."""
|
| 111 |
+
columns = get_terminal_columns()
|
| 112 |
+
if not is_layout_supported(columns):
|
| 113 |
+
_warn_narrow_terminal_once(columns)
|
| 114 |
+
return False, columns
|
| 115 |
+
_reset_narrow_warning()
|
| 116 |
+
return _RICH_AVAILABLE, columns
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _plain_rule(columns: int) -> str:
|
| 120 |
+
"""Return a separator that fits within the current terminal."""
|
| 121 |
+
return "โ" * max(columns - 2, 12)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def render_evidence_section(evidence_items: list) -> str:
|
| 125 |
+
"""EvidenceItem dict ๋ฆฌ์คํธ๋ฅผ ์ถ์ฒ ์น์
ํ
์คํธ๋ก ๋ณํํ๋ค.
|
| 126 |
+
|
| 127 |
+
source_type๋ณ๋ก ๊ทธ๋ฃนํํ์ฌ ํ์ํ๋ค:
|
| 128 |
+
[๋ก์ปฌ ๋ฌธ์] โ rag ์ถ์ฒ (file_path, page, score ํฌํจ)
|
| 129 |
+
[์ธ๋ถ API] โ api ์ถ์ฒ (URL ํฌํจ)
|
| 130 |
+
[LLM ์์ฑ] โ llm_generated ์ถ์ฒ
|
| 131 |
+
|
| 132 |
+
Parameters
|
| 133 |
+
----------
|
| 134 |
+
evidence_items : list
|
| 135 |
+
EvidenceItem.to_dict() ํํ์ dict ๋ฆฌ์คํธ.
|
| 136 |
+
|
| 137 |
+
Returns
|
| 138 |
+
-------
|
| 139 |
+
str
|
| 140 |
+
์ถ์ฒ ์น์
ํ
์คํธ. items๊ฐ ์์ผ๋ฉด ๋น ๋ฌธ์์ด.
|
| 141 |
+
"""
|
| 142 |
+
if not evidence_items:
|
| 143 |
+
return ""
|
| 144 |
+
|
| 145 |
+
# source_type๋ณ ๊ทธ๋ฃนํ
|
| 146 |
+
rag_items = [i for i in evidence_items if i.get("source_type") == "rag"]
|
| 147 |
+
api_items = [i for i in evidence_items if i.get("source_type") == "api"]
|
| 148 |
+
llm_items = [i for i in evidence_items if i.get("source_type") == "llm_generated"]
|
| 149 |
+
|
| 150 |
+
lines: list[str] = ["โโ ์ฐธ์กฐ ๊ทผ๊ฑฐ โโ"]
|
| 151 |
+
idx = 1
|
| 152 |
+
|
| 153 |
+
if rag_items:
|
| 154 |
+
lines.append("[๋ก์ปฌ ๋ฌธ์]")
|
| 155 |
+
for item in rag_items:
|
| 156 |
+
title = item.get("title") or item.get("link_or_path", "")
|
| 157 |
+
page = item.get("page")
|
| 158 |
+
score = item.get("score", 0.0)
|
| 159 |
+
page_str = f" (p.{page})" if page is not None else ""
|
| 160 |
+
score_str = f" [{score:.2f}]" if score else ""
|
| 161 |
+
lines.append(f" {idx}. {title}{page_str}{score_str}")
|
| 162 |
+
idx += 1
|
| 163 |
+
|
| 164 |
+
if api_items:
|
| 165 |
+
lines.append("[์ธ๋ถ API]")
|
| 166 |
+
for item in api_items:
|
| 167 |
+
title = item.get("title", "")
|
| 168 |
+
link = item.get("link_or_path", "")
|
| 169 |
+
link_str = f" โ {link}" if link else ""
|
| 170 |
+
lines.append(f" {idx}. {title}{link_str}")
|
| 171 |
+
idx += 1
|
| 172 |
+
|
| 173 |
+
if llm_items:
|
| 174 |
+
lines.append("[LLM ์์ฑ]")
|
| 175 |
+
for item in llm_items:
|
| 176 |
+
title = item.get("title", "")
|
| 177 |
+
excerpt = item.get("excerpt", "")[:80]
|
| 178 |
+
lines.append(f" {idx}. {title}: {excerpt}" if title else f" {idx}. {excerpt}")
|
| 179 |
+
idx += 1
|
| 180 |
+
|
| 181 |
+
return "\n".join(lines) if len(lines) > 1 else ""
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def render_result(result: dict) -> None:
|
| 185 |
+
"""Render the final agent response to the terminal.
|
| 186 |
+
|
| 187 |
+
Expected keys (at least one required):
|
| 188 |
+
- result["text"] or result["response"]: main answer text
|
| 189 |
+
- result["evidence_items"]: EvidenceItem dict ๋ฆฌ์คํธ (structured, ์ฐ์ )
|
| 190 |
+
- result["citations"] or result["sources"]: list of source strings (fallback)
|
| 191 |
+
"""
|
| 192 |
+
text_body: str = result.get("text") or result.get("response") or ""
|
| 193 |
+
evidence_items: list = result.get("evidence_items") or []
|
| 194 |
+
citations: list = result.get("citations") or result.get("sources") or []
|
| 195 |
+
|
| 196 |
+
use_rich, columns = _resolve_render_mode()
|
| 197 |
+
|
| 198 |
+
if use_rich:
|
| 199 |
+
content = Text(text_body)
|
| 200 |
+
if evidence_items:
|
| 201 |
+
evidence_text = render_evidence_section(evidence_items)
|
| 202 |
+
if evidence_text:
|
| 203 |
+
content.append(f"\n\n{evidence_text}\n", style="dim")
|
| 204 |
+
elif citations:
|
| 205 |
+
content.append("\n\n์ถ์ฒ\n", style="bold")
|
| 206 |
+
for idx, src in enumerate(citations, 1):
|
| 207 |
+
content.append(f" {idx}. {src}\n", style="dim")
|
| 208 |
+
_console.print(
|
| 209 |
+
Panel(
|
| 210 |
+
content,
|
| 211 |
+
title="[bold green]GovOn[/bold green]",
|
| 212 |
+
border_style="green",
|
| 213 |
+
width=get_panel_width(columns),
|
| 214 |
+
)
|
| 215 |
+
)
|
| 216 |
+
else:
|
| 217 |
+
rule = _plain_rule(columns)
|
| 218 |
+
print(f"\n{rule}")
|
| 219 |
+
print("GovOn")
|
| 220 |
+
print(text_body)
|
| 221 |
+
if evidence_items:
|
| 222 |
+
evidence_text = render_evidence_section(evidence_items)
|
| 223 |
+
if evidence_text:
|
| 224 |
+
print(f"\n{evidence_text}")
|
| 225 |
+
elif citations:
|
| 226 |
+
print("\n์ถ์ฒ")
|
| 227 |
+
for idx, src in enumerate(citations, 1):
|
| 228 |
+
print(f" {idx}. {src}")
|
| 229 |
+
print(f"{rule}\n")
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def render_status(message: str) -> None:
|
| 233 |
+
"""Render a transient status / progress message."""
|
| 234 |
+
use_rich, _ = _resolve_render_mode()
|
| 235 |
+
if use_rich:
|
| 236 |
+
_console.print(f"[dim]โ {message}[/dim]")
|
| 237 |
+
else:
|
| 238 |
+
print(f"โ {message}")
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def render_error(message: str) -> None:
|
| 242 |
+
"""Render an error message in red."""
|
| 243 |
+
use_rich, _ = _resolve_render_mode()
|
| 244 |
+
if use_rich:
|
| 245 |
+
_console.print(f"[bold red]์ค๋ฅ:[/bold red] {message}")
|
| 246 |
+
else:
|
| 247 |
+
print(f"์ค๋ฅ: {message}")
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def render_session_info(session_id: str) -> None:
|
| 251 |
+
"""Render session resume hint at shell exit."""
|
| 252 |
+
hint = f"[session: {session_id}] govon --session {session_id} ๋ก ์ฌ๊ฐ ๊ฐ๋ฅ"
|
| 253 |
+
use_rich, _ = _resolve_render_mode()
|
| 254 |
+
if use_rich:
|
| 255 |
+
_console.print(f"[dim]{hint}[/dim]")
|
| 256 |
+
else:
|
| 257 |
+
print(hint)
|
src/cli/shell.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn CLI โ main REPL loop and entry point.
|
| 2 |
+
|
| 3 |
+
Entry point registered in pyproject.toml:
|
| 4 |
+
[project.scripts]
|
| 5 |
+
govon = "src.cli.shell:main"
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
|
| 14 |
+
import httpx
|
| 15 |
+
|
| 16 |
+
# ---------------------------------------------------------------------------
|
| 17 |
+
# Optional dependencies โ graceful degradation
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
+
_PT_AVAILABLE = False
|
| 20 |
+
try:
|
| 21 |
+
from prompt_toolkit import PromptSession
|
| 22 |
+
from prompt_toolkit.history import InMemoryHistory
|
| 23 |
+
|
| 24 |
+
_PT_AVAILABLE = True
|
| 25 |
+
except ImportError: # pragma: no cover
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
# Internal modules
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
from src.cli.approval_ui import show_approval_prompt
|
| 32 |
+
from src.cli.commands import handle_command, is_command
|
| 33 |
+
from src.cli.renderer import (
|
| 34 |
+
StreamingStatusDisplay,
|
| 35 |
+
get_node_message,
|
| 36 |
+
render_error,
|
| 37 |
+
render_result,
|
| 38 |
+
render_session_info,
|
| 39 |
+
render_status,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
# Stub imports for daemon / http_client (other agents implement these).
|
| 44 |
+
# If the real modules exist they are used; otherwise lightweight stubs
|
| 45 |
+
# are defined inline so the shell can be imported and tested standalone.
|
| 46 |
+
# ---------------------------------------------------------------------------
|
| 47 |
+
try:
|
| 48 |
+
from src.cli.daemon import DaemonManager # type: ignore[import]
|
| 49 |
+
except ImportError: # pragma: no cover
|
| 50 |
+
|
| 51 |
+
class DaemonManager: # type: ignore[no-redef]
|
| 52 |
+
"""Stub: real implementation provided by daemon.py agent."""
|
| 53 |
+
|
| 54 |
+
def ensure_running(self) -> str:
|
| 55 |
+
raise RuntimeError("DaemonManager not available. Install the full GovOn package.")
|
| 56 |
+
|
| 57 |
+
def is_running(self) -> bool:
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
def stop(self) -> None:
|
| 61 |
+
pass
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
from src.cli.http_client import GovOnClient # type: ignore[import]
|
| 66 |
+
except ImportError: # pragma: no cover
|
| 67 |
+
|
| 68 |
+
class GovOnClient: # type: ignore[no-redef]
|
| 69 |
+
"""Stub: real implementation provided by http_client.py agent."""
|
| 70 |
+
|
| 71 |
+
def __init__(self, base_url: str) -> None:
|
| 72 |
+
self._base_url = base_url
|
| 73 |
+
|
| 74 |
+
def run(self, query: str, session_id: str | None = None) -> dict:
|
| 75 |
+
raise RuntimeError("GovOnClient not available. Install the full GovOn package.")
|
| 76 |
+
|
| 77 |
+
def stream(self, query: str, session_id: str | None = None):
|
| 78 |
+
raise RuntimeError("GovOnClient not available. Install the full GovOn package.")
|
| 79 |
+
yield # make it a generator
|
| 80 |
+
|
| 81 |
+
def approve(self, thread_id: str, approved: bool) -> dict:
|
| 82 |
+
raise RuntimeError("GovOnClient not available. Install the full GovOn package.")
|
| 83 |
+
|
| 84 |
+
def cancel(self, thread_id: str) -> dict:
|
| 85 |
+
raise RuntimeError("GovOnClient not available. Install the full GovOn package.")
|
| 86 |
+
|
| 87 |
+
def health(self) -> dict:
|
| 88 |
+
raise RuntimeError("GovOnClient not available. Install the full GovOn package.")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ---------------------------------------------------------------------------
|
| 92 |
+
# Core helpers
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
|
| 95 |
+
_PROMPT_TEXT = "govon> "
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _get_input(session: "PromptSession | None") -> str: # type: ignore[name-defined]
|
| 99 |
+
"""Read one line of user input (prompt_toolkit or plain input())."""
|
| 100 |
+
if _PT_AVAILABLE and session is not None:
|
| 101 |
+
return session.prompt(_PROMPT_TEXT)
|
| 102 |
+
return input(_PROMPT_TEXT)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _process_query(
|
| 106 |
+
client: "GovOnClient",
|
| 107 |
+
query: str,
|
| 108 |
+
session_id: str | None,
|
| 109 |
+
) -> tuple[str | None, bool]:
|
| 110 |
+
"""Send *query* to the backend and handle approval flow.
|
| 111 |
+
|
| 112 |
+
Attempts to use the streaming endpoint (/v2/agent/stream) for per-node
|
| 113 |
+
progress display. Falls back to the blocking run() call when the streaming
|
| 114 |
+
endpoint is unavailable.
|
| 115 |
+
|
| 116 |
+
Returns (new_session_id, should_continue).
|
| 117 |
+
`should_continue` is False only when an unrecoverable error is returned
|
| 118 |
+
that suggests the daemon is down.
|
| 119 |
+
"""
|
| 120 |
+
# --- Try streaming path first ---
|
| 121 |
+
try:
|
| 122 |
+
return _process_query_streaming(client, query, session_id)
|
| 123 |
+
except (AttributeError, NotImplementedError):
|
| 124 |
+
# client.stream() is not available (stub or older server)
|
| 125 |
+
pass
|
| 126 |
+
except (ConnectionError, httpx.HTTPStatusError, httpx.StreamError, OSError):
|
| 127 |
+
# Streaming endpoint unavailable โ fall back silently
|
| 128 |
+
pass
|
| 129 |
+
|
| 130 |
+
# --- Fallback: blocking run() with simple spinner ---
|
| 131 |
+
return _process_query_blocking(client, query, session_id)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _process_query_streaming(
|
| 135 |
+
client: "GovOnClient",
|
| 136 |
+
query: str,
|
| 137 |
+
session_id: str | None,
|
| 138 |
+
) -> tuple[str | None, bool]:
|
| 139 |
+
"""Streaming path: calls client.stream() and shows per-node progress."""
|
| 140 |
+
final_response: dict = {}
|
| 141 |
+
approval_event: dict | None = None
|
| 142 |
+
new_session_id: str | None = None
|
| 143 |
+
|
| 144 |
+
with StreamingStatusDisplay("์ฒ๋ฆฌ ์คโฆ") as status_display:
|
| 145 |
+
for event in client.stream(query, session_id):
|
| 146 |
+
node: str = event.get("node", "")
|
| 147 |
+
event_status: str = event.get("status", "")
|
| 148 |
+
|
| 149 |
+
if node == "error" or event_status == "error":
|
| 150 |
+
render_error(event.get("error", "์ ์ ์๋ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."))
|
| 151 |
+
return session_id, True
|
| 152 |
+
|
| 153 |
+
if event_status == "awaiting_approval":
|
| 154 |
+
approval_event = event
|
| 155 |
+
break
|
| 156 |
+
|
| 157 |
+
# Update spinner with node-specific message
|
| 158 |
+
if node:
|
| 159 |
+
msg = get_node_message(node)
|
| 160 |
+
status_display.update(msg)
|
| 161 |
+
|
| 162 |
+
# Collect session/thread id from any event
|
| 163 |
+
if not new_session_id:
|
| 164 |
+
new_session_id = event.get("session_id") or event.get("thread_id")
|
| 165 |
+
|
| 166 |
+
# Collect final result if present
|
| 167 |
+
if event_status == "completed" or event.get("final_text") or event.get("text"):
|
| 168 |
+
final_response = event
|
| 169 |
+
|
| 170 |
+
# Handle approval
|
| 171 |
+
if approval_event is not None:
|
| 172 |
+
if not new_session_id:
|
| 173 |
+
new_session_id = approval_event.get("session_id") or approval_event.get("thread_id")
|
| 174 |
+
approval_request: dict = approval_event.get("approval_request") or {}
|
| 175 |
+
approved = show_approval_prompt(approval_request)
|
| 176 |
+
thread_id: str = approval_event.get("thread_id") or ""
|
| 177 |
+
|
| 178 |
+
if not approved:
|
| 179 |
+
try:
|
| 180 |
+
client.approve(thread_id, approved=False)
|
| 181 |
+
except Exception: # pragma: no cover
|
| 182 |
+
pass
|
| 183 |
+
return new_session_id or session_id, True
|
| 184 |
+
|
| 185 |
+
render_status("์น์ธ๋จ โ ๊ณ์ ์งํ ์คโฆ")
|
| 186 |
+
try:
|
| 187 |
+
approved_response = client.approve(thread_id, approved=True)
|
| 188 |
+
except Exception as exc: # pragma: no cover
|
| 189 |
+
render_error(f"์น์ธ ์์ฒญ ์คํจ: {exc}")
|
| 190 |
+
return new_session_id or session_id, True
|
| 191 |
+
|
| 192 |
+
render_result(approved_response)
|
| 193 |
+
return (
|
| 194 |
+
approved_response.get("session_id")
|
| 195 |
+
or approved_response.get("thread_id")
|
| 196 |
+
or new_session_id
|
| 197 |
+
or session_id,
|
| 198 |
+
True,
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
# Handle completed result from streaming events
|
| 202 |
+
if final_response:
|
| 203 |
+
_sid = final_response.get("session_id") or final_response.get("thread_id") or new_session_id
|
| 204 |
+
render_result(final_response)
|
| 205 |
+
return _sid or session_id, True
|
| 206 |
+
|
| 207 |
+
# No useful response received
|
| 208 |
+
render_result({"text": ""})
|
| 209 |
+
return new_session_id or session_id, True
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def _process_query_blocking(
|
| 213 |
+
client: "GovOnClient",
|
| 214 |
+
query: str,
|
| 215 |
+
session_id: str | None,
|
| 216 |
+
) -> tuple[str | None, bool]:
|
| 217 |
+
"""Blocking fallback path: calls client.run() with a simple spinner."""
|
| 218 |
+
render_status("์ฒ๋ฆฌ ์คโฆ")
|
| 219 |
+
|
| 220 |
+
try:
|
| 221 |
+
response = client.run(query, session_id)
|
| 222 |
+
except Exception as exc: # pragma: no cover
|
| 223 |
+
render_error(f"์์ฒญ ์คํจ: {exc}")
|
| 224 |
+
return session_id, True
|
| 225 |
+
|
| 226 |
+
new_session_id: str | None = response.get("session_id") or response.get("thread_id")
|
| 227 |
+
status: str = response.get("status", "")
|
| 228 |
+
|
| 229 |
+
if status == "awaiting_approval":
|
| 230 |
+
approval_request: dict = response.get("approval_request") or {}
|
| 231 |
+
approved = show_approval_prompt(approval_request)
|
| 232 |
+
|
| 233 |
+
if not approved:
|
| 234 |
+
# ๊ฑฐ์ : ์๋ฒ์ ํต๋ณด ํ ํ๋กฌํํธ ๋ณต๊ท
|
| 235 |
+
_thread_id: str = response.get("thread_id") or ""
|
| 236 |
+
try:
|
| 237 |
+
client.approve(_thread_id, approved=False)
|
| 238 |
+
except Exception: # pragma: no cover
|
| 239 |
+
pass
|
| 240 |
+
return new_session_id or session_id, True
|
| 241 |
+
|
| 242 |
+
thread_id: str = response.get("thread_id") or ""
|
| 243 |
+
render_status("์น์ธ๋จ โ ๊ณ์ ์งํ ์คโฆ")
|
| 244 |
+
try:
|
| 245 |
+
approved_response = client.approve(thread_id, approved=True)
|
| 246 |
+
except Exception as exc: # pragma: no cover
|
| 247 |
+
render_error(f"์น์ธ ์์ฒญ ์คํจ: {exc}")
|
| 248 |
+
return new_session_id or session_id, True
|
| 249 |
+
|
| 250 |
+
render_result(approved_response)
|
| 251 |
+
return (
|
| 252 |
+
approved_response.get("session_id")
|
| 253 |
+
or approved_response.get("thread_id")
|
| 254 |
+
or new_session_id
|
| 255 |
+
or session_id,
|
| 256 |
+
True,
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
if status in ("completed", "done", "success") or "text" in response or "response" in response:
|
| 260 |
+
render_result(response)
|
| 261 |
+
return new_session_id or session_id, True
|
| 262 |
+
|
| 263 |
+
# Unknown status โ render raw
|
| 264 |
+
render_result({"text": str(response)})
|
| 265 |
+
return new_session_id or session_id, True
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
# ---------------------------------------------------------------------------
|
| 269 |
+
# REPL loop
|
| 270 |
+
# ---------------------------------------------------------------------------
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def _run_repl(client: "GovOnClient", initial_session_id: str | None = None) -> None:
|
| 274 |
+
"""Run the interactive REPL until EOF or /exit."""
|
| 275 |
+
session_id: str | None = initial_session_id
|
| 276 |
+
pt_session = PromptSession(history=InMemoryHistory()) if _PT_AVAILABLE else None
|
| 277 |
+
|
| 278 |
+
while True:
|
| 279 |
+
try:
|
| 280 |
+
text = _get_input(pt_session).strip()
|
| 281 |
+
except EOFError:
|
| 282 |
+
# Ctrl+D
|
| 283 |
+
break
|
| 284 |
+
except KeyboardInterrupt:
|
| 285 |
+
# Ctrl+C while idle โ exit
|
| 286 |
+
print()
|
| 287 |
+
break
|
| 288 |
+
|
| 289 |
+
if not text:
|
| 290 |
+
continue
|
| 291 |
+
|
| 292 |
+
if is_command(text):
|
| 293 |
+
try:
|
| 294 |
+
result = handle_command(text)
|
| 295 |
+
except SystemExit:
|
| 296 |
+
break
|
| 297 |
+
if result is not None:
|
| 298 |
+
print(result)
|
| 299 |
+
continue
|
| 300 |
+
|
| 301 |
+
# Normal query
|
| 302 |
+
try:
|
| 303 |
+
session_id, should_continue = _process_query(client, text, session_id)
|
| 304 |
+
except KeyboardInterrupt:
|
| 305 |
+
# Ctrl+C while processing โ cancel and return to prompt
|
| 306 |
+
print("\n์์ฒญ์ด ์ทจ์๋์์ต๋๋ค.")
|
| 307 |
+
continue
|
| 308 |
+
|
| 309 |
+
if not should_continue: # pragma: no cover
|
| 310 |
+
break
|
| 311 |
+
|
| 312 |
+
if session_id:
|
| 313 |
+
render_session_info(session_id)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# ---------------------------------------------------------------------------
|
| 317 |
+
# Single-shot mode
|
| 318 |
+
# ---------------------------------------------------------------------------
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _run_once(client: "GovOnClient", query: str, session_id: str | None) -> None:
|
| 322 |
+
"""Run a single query and exit."""
|
| 323 |
+
new_session_id, _ = _process_query(client, query, session_id)
|
| 324 |
+
if new_session_id:
|
| 325 |
+
render_session_info(new_session_id)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
# ---------------------------------------------------------------------------
|
| 329 |
+
# Entry point
|
| 330 |
+
# ---------------------------------------------------------------------------
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def main() -> None:
|
| 334 |
+
"""CLI entry point for the `govon` command."""
|
| 335 |
+
parser = argparse.ArgumentParser(
|
| 336 |
+
prog="govon",
|
| 337 |
+
description="GovOn โ shell-first local agentic runtime",
|
| 338 |
+
formatter_class=argparse.RawTextHelpFormatter,
|
| 339 |
+
)
|
| 340 |
+
parser.add_argument(
|
| 341 |
+
"query",
|
| 342 |
+
nargs="?",
|
| 343 |
+
default=None,
|
| 344 |
+
help="๋จ๋ฐ ์คํํ ์ง๋ฌธ (์๋ต ์ ์ธํฐ๋ํฐ๋ธ REPL ๋ชจ๋)",
|
| 345 |
+
)
|
| 346 |
+
parser.add_argument(
|
| 347 |
+
"--session",
|
| 348 |
+
metavar="SESSION_ID",
|
| 349 |
+
default=None,
|
| 350 |
+
help="์ฌ๊ฐํ ๊ธฐ์กด ์ธ์
ID",
|
| 351 |
+
)
|
| 352 |
+
parser.add_argument(
|
| 353 |
+
"--status",
|
| 354 |
+
action="store_true",
|
| 355 |
+
help="daemon ์ํ ํ์ธ ํ ์ข
๋ฃ",
|
| 356 |
+
)
|
| 357 |
+
parser.add_argument(
|
| 358 |
+
"--stop",
|
| 359 |
+
action="store_true",
|
| 360 |
+
help="daemon ์ค์ง ํ ์ข
๋ฃ",
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
args = parser.parse_args()
|
| 364 |
+
|
| 365 |
+
# GOVON_RUNTIME_URL์ด ์ค์ ๋ ๊ฒฝ์ฐ ์๊ฒฉ ์๋ฒ์ ์ง์ ์ฐ๊ฒฐํ๊ณ daemon์ ๊ด๋ฆฌํ์ง ์๋๋ค.
|
| 366 |
+
runtime_url = os.environ.get("GOVON_RUNTIME_URL")
|
| 367 |
+
|
| 368 |
+
if runtime_url:
|
| 369 |
+
if not runtime_url.startswith(("http://", "https://")):
|
| 370 |
+
print(
|
| 371 |
+
f"์ค๋ฅ: GOVON_RUNTIME_URL์ http:// ๋๋ https://๋ก ์์ํด์ผ ํฉ๋๋ค: {runtime_url}",
|
| 372 |
+
file=sys.stderr,
|
| 373 |
+
)
|
| 374 |
+
sys.exit(1)
|
| 375 |
+
# ์๊ฒฉ ๋ฐํ์ ๋ชจ๋: daemon ๊ด๋ฆฌ ์์ด ์ง์ ๋ URL์ ์ง์ ์ฐ๊ฒฐ
|
| 376 |
+
if args.status:
|
| 377 |
+
print(f"GovOn daemon: ์๊ฒฉ ๋ชจ๋ (GOVON_RUNTIME_URL={runtime_url})")
|
| 378 |
+
sys.exit(0)
|
| 379 |
+
if args.stop:
|
| 380 |
+
print("์ค๋ฅ: ์๊ฒฉ ๋ฐํ์ ๋ชจ๋์์๋ --stop์ ์ฌ์ฉํ ์ ์์ต๋๋ค.", file=sys.stderr)
|
| 381 |
+
sys.exit(1)
|
| 382 |
+
base_url = runtime_url.rstrip("/")
|
| 383 |
+
else:
|
| 384 |
+
# ๋ก์ปฌ daemon ๋ชจ๋
|
| 385 |
+
daemon = DaemonManager()
|
| 386 |
+
|
| 387 |
+
# --status
|
| 388 |
+
if args.status:
|
| 389 |
+
if daemon.is_running():
|
| 390 |
+
print("GovOn daemon: ์คํ ์ค")
|
| 391 |
+
else:
|
| 392 |
+
print("GovOn daemon: ์ค์ง๋จ")
|
| 393 |
+
sys.exit(0)
|
| 394 |
+
|
| 395 |
+
# --stop
|
| 396 |
+
if args.stop:
|
| 397 |
+
daemon.stop()
|
| 398 |
+
print("GovOn daemon์ด ์ค์ง๋์์ต๋๋ค.")
|
| 399 |
+
sys.exit(0)
|
| 400 |
+
|
| 401 |
+
# Ensure daemon is up and get base URL
|
| 402 |
+
try:
|
| 403 |
+
base_url = daemon.ensure_running()
|
| 404 |
+
except Exception as exc:
|
| 405 |
+
print(f"์ค๋ฅ: daemon์ ์์ํ ์ ์์ต๋๋ค โ {exc}", file=sys.stderr)
|
| 406 |
+
sys.exit(1)
|
| 407 |
+
|
| 408 |
+
client = GovOnClient(base_url)
|
| 409 |
+
|
| 410 |
+
if args.query:
|
| 411 |
+
# Single-shot mode
|
| 412 |
+
_run_once(client, args.query, args.session)
|
| 413 |
+
else:
|
| 414 |
+
# Interactive REPL mode
|
| 415 |
+
print("GovOn CLI (์ข
๋ฃ: Ctrl+D ๋๋ /exit)")
|
| 416 |
+
_run_repl(client, initial_session_id=args.session)
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
if __name__ == "__main__":
|
| 420 |
+
main()
|
src/cli/terminal.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Terminal layout helpers for the GovOn CLI."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
DEFAULT_TERMINAL_COLUMNS = 80
|
| 8 |
+
MIN_TERMINAL_COLUMNS = 40
|
| 9 |
+
MIN_CONTENT_WIDTH = 20
|
| 10 |
+
APPROVAL_BOX_MAX_WIDTH = 55
|
| 11 |
+
APPROVAL_BOX_MARGIN = 4
|
| 12 |
+
PANEL_MARGIN = 2
|
| 13 |
+
|
| 14 |
+
assert MIN_CONTENT_WIDTH < MIN_TERMINAL_COLUMNS
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_terminal_columns(default: int = DEFAULT_TERMINAL_COLUMNS) -> int:
|
| 18 |
+
"""Return the current terminal width in columns."""
|
| 19 |
+
return max(shutil.get_terminal_size(fallback=(default, 24)).columns, 1)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _cols(columns: int | None) -> int:
|
| 23 |
+
"""Resolve an explicit column override or read the current terminal width."""
|
| 24 |
+
return get_terminal_columns() if columns is None else columns
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def is_layout_supported(columns: int | None = None) -> bool:
|
| 28 |
+
"""Return True when the terminal is wide enough for full rich layouts."""
|
| 29 |
+
return _cols(columns) >= MIN_TERMINAL_COLUMNS
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_approval_box_width(columns: int | None = None) -> int:
|
| 33 |
+
"""Return the inner width for the approval box.
|
| 34 |
+
|
| 35 |
+
Callers should gate rich box rendering with `is_layout_supported()` first.
|
| 36 |
+
For very narrow terminals, plain fallback is the supported rendering path.
|
| 37 |
+
"""
|
| 38 |
+
current_columns = _cols(columns)
|
| 39 |
+
return max(
|
| 40 |
+
MIN_CONTENT_WIDTH,
|
| 41 |
+
min(APPROVAL_BOX_MAX_WIDTH, current_columns - APPROVAL_BOX_MARGIN),
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_panel_width(columns: int | None = None) -> int:
|
| 46 |
+
"""Return the rich panel width for result rendering."""
|
| 47 |
+
return max(MIN_CONTENT_WIDTH, _cols(columns) - PANEL_MARGIN)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_narrow_terminal_warning(columns: int | None = None) -> str:
|
| 51 |
+
"""Return the warning shown when the terminal is too narrow."""
|
| 52 |
+
current_columns = _cols(columns)
|
| 53 |
+
return (
|
| 54 |
+
f"ํฐ๋ฏธ๋ ๋๋น๊ฐ {current_columns}์ด๋ก ์ข์ plain mode๋ก ์ ํํฉ๋๋ค. "
|
| 55 |
+
f"์ต์ {MIN_TERMINAL_COLUMNS}์ด ์ด์์์ ์ ์ฒด ๋ ์ด์์์ด ๋ณด์ฅ๋ฉ๋๋ค."
|
| 56 |
+
)
|
src/data_collection_preprocessing/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ฏผ์๋ต๋ณ ์ด๋ํฐ์ฉ ํ์ต ๋ฐ์ดํฐ ์์ง ๋ฐ ์ ์ฒ๋ฆฌ ํจํค์ง."""
|
| 2 |
+
|
| 3 |
+
from .config import DataConfig
|
| 4 |
+
from .parsers import AdminLawParser, GovQAParser, GukripParser
|
| 5 |
+
from .pipeline import CivilResponseDataPipeline
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"DataConfig",
|
| 9 |
+
"GukripParser",
|
| 10 |
+
"GovQAParser",
|
| 11 |
+
"AdminLawParser",
|
| 12 |
+
"CivilResponseDataPipeline",
|
| 13 |
+
]
|
src/data_collection_preprocessing/__main__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""python -m src.data_collection_preprocessing ์ง์
์ ."""
|
| 2 |
+
|
| 3 |
+
from .cli import main
|
| 4 |
+
|
| 5 |
+
main()
|
src/data_collection_preprocessing/cli.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CLI entry point: python -m src.data_collection_preprocessing"""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
import logging
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from .config import DataConfig
|
| 11 |
+
from .pipeline import CivilResponseDataPipeline
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main() -> None:
|
| 15 |
+
parser = argparse.ArgumentParser(description="๋ฏผ์๋ต๋ณ ์ด๋ํฐ ํ์ต ๋ฐ์ดํฐ ํ์ดํ๋ผ์ธ")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--raw-dir",
|
| 18 |
+
default="data/raw/aihub",
|
| 19 |
+
help="AI Hub ์์ ๋ฐ์ดํฐ ๋ฃจํธ ๋๋ ํฐ๋ฆฌ (๊ธฐ๋ณธ: data/raw/aihub)",
|
| 20 |
+
)
|
| 21 |
+
parser.add_argument(
|
| 22 |
+
"--output-dir",
|
| 23 |
+
default="data/processed",
|
| 24 |
+
help="์ถ๋ ฅ ๋๋ ํฐ๋ฆฌ (๊ธฐ๋ณธ: data/processed)",
|
| 25 |
+
)
|
| 26 |
+
parser.add_argument(
|
| 27 |
+
"--min-answer-length",
|
| 28 |
+
type=int,
|
| 29 |
+
default=30,
|
| 30 |
+
help="์ต์ ๋ต๋ณ ๊ธธ์ด (๊ธฐ๋ณธ: 30์)",
|
| 31 |
+
)
|
| 32 |
+
parser.add_argument(
|
| 33 |
+
"--max-answer-length",
|
| 34 |
+
type=int,
|
| 35 |
+
default=4096,
|
| 36 |
+
help="์ต๋ ๋ต๋ณ ๊ธธ์ด (๊ธฐ๋ณธ: 4096์)",
|
| 37 |
+
)
|
| 38 |
+
parser.add_argument(
|
| 39 |
+
"--train-ratio",
|
| 40 |
+
type=float,
|
| 41 |
+
default=0.9,
|
| 42 |
+
help="train ๋น์จ (๊ธฐ๋ณธ: 0.9)",
|
| 43 |
+
)
|
| 44 |
+
parser.add_argument(
|
| 45 |
+
"--log-level",
|
| 46 |
+
default="INFO",
|
| 47 |
+
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
| 48 |
+
)
|
| 49 |
+
args = parser.parse_args()
|
| 50 |
+
|
| 51 |
+
logging.basicConfig(
|
| 52 |
+
level=getattr(logging, args.log_level),
|
| 53 |
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
| 54 |
+
stream=sys.stdout,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
config = DataConfig(
|
| 58 |
+
raw_dir=Path(args.raw_dir),
|
| 59 |
+
output_dir=Path(args.output_dir),
|
| 60 |
+
min_answer_length=args.min_answer_length,
|
| 61 |
+
max_answer_length=args.max_answer_length,
|
| 62 |
+
train_ratio=args.train_ratio,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
pipeline = CivilResponseDataPipeline(config)
|
| 66 |
+
stats = pipeline.run()
|
| 67 |
+
|
| 68 |
+
print("\nํ์ดํ๋ผ์ธ ์๋ฃ")
|
| 69 |
+
print(f" ์ด ๋ ์ฝ๋: {stats['total']:,}")
|
| 70 |
+
print(f" train: {stats['train']:,}")
|
| 71 |
+
print(f" val: {stats['val']:,}")
|
| 72 |
+
print(f" ์ถ๋ ฅ ๊ฒฝ๋ก: {args.output_dir}/")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
main()
|
src/data_collection_preprocessing/config.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ฐ์ดํฐ ํ์ดํ๋ผ์ธ ์ค์ ."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class DataConfig:
|
| 9 |
+
raw_dir: Path = Path("data/raw/aihub")
|
| 10 |
+
output_dir: Path = Path("data/processed")
|
| 11 |
+
min_answer_length: int = 30
|
| 12 |
+
max_answer_length: int = 4096
|
| 13 |
+
min_question_length: int = 5
|
| 14 |
+
train_ratio: float = 0.9
|
src/data_collection_preprocessing/parsers.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AI Hub ๋ฐ์ดํฐ์
ํ์ ๋ชจ๋.
|
| 2 |
+
|
| 3 |
+
๊ฐ ํ์๋ ๋จ์ผ JSON ํ์ผ์ ํ์ฑํ์ฌ ํ์ต ๋ ์ฝ๋ ๋ชฉ๋ก์ ๋ฐํํ๋ค.
|
| 4 |
+
๋ฐํ ํ์:
|
| 5 |
+
{
|
| 6 |
+
"question": str,
|
| 7 |
+
"answer": str,
|
| 8 |
+
"source": str,
|
| 9 |
+
"category": str,
|
| 10 |
+
"metadata": dict,
|
| 11 |
+
}
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
import re
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from typing import Any
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _load_json(filepath: Path) -> Any:
|
| 23 |
+
with open(filepath, encoding="utf-8") as f:
|
| 24 |
+
return json.load(f)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class GukripParser:
|
| 28 |
+
"""71852 ๊ตญ๋ฆฝ์์์๋ฌธํ์ ๋น ํ์.
|
| 29 |
+
|
| 30 |
+
consulting_content์ '์๋ด์:' ๋ฐํ๋ฅผ ์ถ์ถํ์ฌ ๋ต๋ณ์ผ๋ก ์ฌ์ฉํ๊ณ ,
|
| 31 |
+
instructions[0].data[0].instruction์ ์ง๋ฌธ์ผ๋ก ์ฌ์ฉํ๋ค.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def parse(self, filepath: Path) -> list[dict]:
|
| 35 |
+
data = _load_json(filepath)
|
| 36 |
+
if isinstance(data, list):
|
| 37 |
+
records = []
|
| 38 |
+
for item in data:
|
| 39 |
+
records.extend(self._parse_item(item))
|
| 40 |
+
return records
|
| 41 |
+
return self._parse_item(data)
|
| 42 |
+
|
| 43 |
+
def _parse_item(self, item: dict) -> list[dict]:
|
| 44 |
+
content: str = item.get("consulting_content", "")
|
| 45 |
+
source_id: str = item.get("source_id", "")
|
| 46 |
+
consulting_date: str = item.get("consulting_date", "")
|
| 47 |
+
category: str = item.get("consulting_category", "")
|
| 48 |
+
|
| 49 |
+
# ์๋ด์ ๋ฐํ ์ถ์ถ
|
| 50 |
+
agent_turns = []
|
| 51 |
+
for line in content.split("\n"):
|
| 52 |
+
line = line.strip()
|
| 53 |
+
if line.startswith("์๋ด์:"):
|
| 54 |
+
turn_text = line[len("์๋ด์:") :].strip()
|
| 55 |
+
if turn_text:
|
| 56 |
+
agent_turns.append(turn_text)
|
| 57 |
+
|
| 58 |
+
if not agent_turns:
|
| 59 |
+
return []
|
| 60 |
+
|
| 61 |
+
answer = " ".join(agent_turns)
|
| 62 |
+
|
| 63 |
+
# instruction์์ ์ง๋ฌธ ์ถ์ถ
|
| 64 |
+
instructions = item.get("instructions", [])
|
| 65 |
+
if not instructions:
|
| 66 |
+
return []
|
| 67 |
+
|
| 68 |
+
data_list = instructions[0].get("data", [])
|
| 69 |
+
if not data_list:
|
| 70 |
+
return []
|
| 71 |
+
|
| 72 |
+
question = data_list[0].get("instruction", "").strip()
|
| 73 |
+
if not question:
|
| 74 |
+
return []
|
| 75 |
+
|
| 76 |
+
return [
|
| 77 |
+
{
|
| 78 |
+
"question": question,
|
| 79 |
+
"answer": answer,
|
| 80 |
+
"source": "71852_๊ตญ๋ฆฝ์์์๋ฌธํ์ ๋น",
|
| 81 |
+
"category": category,
|
| 82 |
+
"metadata": {
|
| 83 |
+
"source_id": source_id,
|
| 84 |
+
"consulting_date": consulting_date,
|
| 85 |
+
},
|
| 86 |
+
}
|
| 87 |
+
]
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class GovQAParser:
|
| 91 |
+
"""71852 ์ค์/์ง๋ฐฉํ์ ๊ธฐ๊ด ํ์.
|
| 92 |
+
|
| 93 |
+
consulting_content์์ Q/A ํ์์ ํ์ฑํ์ฌ ๊ณต์ ์ ๋ถ ๋ต๋ณ์ ์ถ์ถํ๋ค.
|
| 94 |
+
๋ณด์กฐ ์ง๋ฌธ(instructions.data[*].instruction)์ ๋ณ๋ ๋ ์ฝ๋๋ก ์์ฑํ๋ค.
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
# A ๊ตฌ๋ถ์ ํจํด: "\nA :" ๋๋ "\nA:"
|
| 98 |
+
_A_SEP = re.compile(r"\nA\s*:")
|
| 99 |
+
|
| 100 |
+
def parse(self, filepath: Path) -> list[dict]:
|
| 101 |
+
data = _load_json(filepath)
|
| 102 |
+
if isinstance(data, list):
|
| 103 |
+
records = []
|
| 104 |
+
for item in data:
|
| 105 |
+
records.extend(self._parse_item(item))
|
| 106 |
+
return records
|
| 107 |
+
return self._parse_item(data)
|
| 108 |
+
|
| 109 |
+
def _parse_item(self, item: dict) -> list[dict]:
|
| 110 |
+
content: str = item.get("consulting_content", "")
|
| 111 |
+
source_str: str = item.get("source", "")
|
| 112 |
+
source_id: str = item.get("source_id", "")
|
| 113 |
+
consulting_date: str = item.get("consulting_date", "")
|
| 114 |
+
category: str = item.get("consulting_category", "")
|
| 115 |
+
|
| 116 |
+
# A ๋ถ๋ถ ๋ถ๋ฆฌ
|
| 117 |
+
parts = self._A_SEP.split(content, maxsplit=1)
|
| 118 |
+
if len(parts) < 2:
|
| 119 |
+
return []
|
| 120 |
+
|
| 121 |
+
q_part, a_part = parts[0], parts[1].strip()
|
| 122 |
+
if not a_part:
|
| 123 |
+
return []
|
| 124 |
+
|
| 125 |
+
# Q ๋ถ๋ถ์์ ์ง๋ฌธ ์ถ์ถ
|
| 126 |
+
question = self._extract_question(q_part)
|
| 127 |
+
if not question:
|
| 128 |
+
return []
|
| 129 |
+
|
| 130 |
+
records = [
|
| 131 |
+
{
|
| 132 |
+
"question": question,
|
| 133 |
+
"answer": a_part,
|
| 134 |
+
"source": "71852_์ค์ํ์ ๊ธฐ๊ด",
|
| 135 |
+
"category": category,
|
| 136 |
+
"metadata": {
|
| 137 |
+
"source_id": source_id,
|
| 138 |
+
"consulting_date": consulting_date,
|
| 139 |
+
"org": source_str,
|
| 140 |
+
},
|
| 141 |
+
}
|
| 142 |
+
]
|
| 143 |
+
|
| 144 |
+
# ๋ณด์กฐ ์ง๋ฌธ(instructions.data[*].instruction)์ผ๋ก ์ถ๊ฐ ๋ ์ฝ๋ ์์ฑ
|
| 145 |
+
instructions = item.get("instructions", [])
|
| 146 |
+
if instructions:
|
| 147 |
+
for instr_item in instructions[0].get("data", []):
|
| 148 |
+
sub_q = instr_item.get("instruction", "").strip()
|
| 149 |
+
if sub_q and sub_q != question:
|
| 150 |
+
records.append(
|
| 151 |
+
{
|
| 152 |
+
"question": sub_q,
|
| 153 |
+
"answer": a_part,
|
| 154 |
+
"source": "71852_์ค์ํ์ ๊ธฐ๊ด",
|
| 155 |
+
"category": category,
|
| 156 |
+
"metadata": {
|
| 157 |
+
"source_id": source_id,
|
| 158 |
+
"consulting_date": consulting_date,
|
| 159 |
+
"org": source_str,
|
| 160 |
+
"question_type": "auxiliary",
|
| 161 |
+
},
|
| 162 |
+
}
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
return records
|
| 166 |
+
|
| 167 |
+
@staticmethod
|
| 168 |
+
def _extract_question(q_part: str) -> str:
|
| 169 |
+
"""Q ๋ธ๋ก์์ ์ง๋ฌธ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ค."""
|
| 170 |
+
# "Q :" ๋๋ "Q:" ์ดํ ํ
์คํธ ์ถ์ถ
|
| 171 |
+
q_match = re.search(r"\nQ\s*:(.*?)(?=\n\n|\Z)", q_part, re.DOTALL)
|
| 172 |
+
if q_match:
|
| 173 |
+
return q_match.group(1).strip()
|
| 174 |
+
|
| 175 |
+
# fallback: "์ ๋ชฉ :" ์ดํ ํ
์คํธ
|
| 176 |
+
title_match = re.search(r"์ ๋ชฉ\s*:\s*(.+)", q_part)
|
| 177 |
+
if title_match:
|
| 178 |
+
return title_match.group(1).strip()
|
| 179 |
+
|
| 180 |
+
return q_part.strip()
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
class GovQALocalParser(GovQAParser):
|
| 184 |
+
"""71852 ์ง๋ฐฉํ์ ๊ธฐ๊ด ํ์ โ GovQAParser์ ๋์ผํ ๋ก์ง, source ๋ ์ด๋ธ๋ง ๋ค๋ฆ."""
|
| 185 |
+
|
| 186 |
+
def _parse_item(self, item: dict) -> list[dict]:
|
| 187 |
+
records = super()._parse_item(item)
|
| 188 |
+
for r in records:
|
| 189 |
+
r["source"] = "71852_์ง๋ฐฉํ์ ๊ธฐ๊ด"
|
| 190 |
+
return records
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
class AdminLawParser:
|
| 194 |
+
"""71847 ํ์ ๋ฒ ํ์.
|
| 195 |
+
|
| 196 |
+
label.input์ ์ง๋ฌธ, label.output์ ๋ต๋ณ์ผ๋ก ์ฌ์ฉํ๋ค.
|
| 197 |
+
๊ฒฐ์ ๋ก(TL_๊ฒฐ์ ๋ก_QA)์ ๋ฒ๋ น(TL_๋ฒ๋ น_QA) ๋ชจ๋ ๋์ผ ๊ตฌ์กฐ.
|
| 198 |
+
"""
|
| 199 |
+
|
| 200 |
+
def __init__(self, source_label: str = "71847_๊ฒฐ์ ๋ก"):
|
| 201 |
+
self.source_label = source_label
|
| 202 |
+
|
| 203 |
+
def parse(self, filepath: Path) -> list[dict]:
|
| 204 |
+
data = _load_json(filepath)
|
| 205 |
+
if isinstance(data, list):
|
| 206 |
+
records = []
|
| 207 |
+
for item in data:
|
| 208 |
+
records.extend(self._parse_item(item))
|
| 209 |
+
return records
|
| 210 |
+
return self._parse_item(data)
|
| 211 |
+
|
| 212 |
+
def _parse_item(self, item: dict) -> list[dict]:
|
| 213 |
+
label = item.get("label", {})
|
| 214 |
+
question = label.get("input", "").strip()
|
| 215 |
+
answer = label.get("output", "").strip()
|
| 216 |
+
|
| 217 |
+
if not question or not answer:
|
| 218 |
+
return []
|
| 219 |
+
|
| 220 |
+
info = item.get("info", {})
|
| 221 |
+
case_name = info.get("caseName", info.get("title", ""))
|
| 222 |
+
category = info.get("ministry", info.get("caseCode", ""))
|
| 223 |
+
|
| 224 |
+
return [
|
| 225 |
+
{
|
| 226 |
+
"question": question,
|
| 227 |
+
"answer": answer,
|
| 228 |
+
"source": self.source_label,
|
| 229 |
+
"category": category,
|
| 230 |
+
"metadata": {
|
| 231 |
+
"case_name": case_name,
|
| 232 |
+
"law_class": info.get("lawClass", ""),
|
| 233 |
+
},
|
| 234 |
+
}
|
| 235 |
+
]
|
src/data_collection_preprocessing/pipeline.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""๋ฏผ์๋ต๋ณ ํ์ต ๋ฐ์ดํฐ ํ์ดํ๋ผ์ธ."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import hashlib
|
| 6 |
+
import json
|
| 7 |
+
import logging
|
| 8 |
+
import random
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from .config import DataConfig
|
| 13 |
+
from .parsers import AdminLawParser, GovQALocalParser, GovQAParser, GukripParser
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class CivilResponseDataPipeline:
|
| 19 |
+
"""AI Hub ์์ ๋ฐ์ดํฐ๋ฅผ instruction-tuning JSONL๋ก ๋ณํํ๋ ํ์ดํ๋ผ์ธ."""
|
| 20 |
+
|
| 21 |
+
INSTRUCTION_TEXT = "๋ค์ ๋ฏผ์์ ๋ํ ๋ต๋ณ์ ์์ฑํด ์ฃผ์ธ์."
|
| 22 |
+
|
| 23 |
+
def __init__(self, config: DataConfig | None = None):
|
| 24 |
+
self.config = config or DataConfig()
|
| 25 |
+
|
| 26 |
+
def run(self) -> dict[str, int]:
|
| 27 |
+
"""์ ์ฒด ํ์ดํ๋ผ์ธ ์คํ. ๊ฒฐ๊ณผ ํต๊ณ ๋ฐํ."""
|
| 28 |
+
records: list[dict] = []
|
| 29 |
+
|
| 30 |
+
logger.info("71852 ๋ฐ์ดํฐ ์ฒ๋ฆฌ ์์")
|
| 31 |
+
records_71852 = self._process_71852()
|
| 32 |
+
logger.info("71852 ๋ฐ์ดํฐ %d๊ฐ ์์ง", len(records_71852))
|
| 33 |
+
records.extend(records_71852)
|
| 34 |
+
|
| 35 |
+
logger.info("71847 ๋ฐ์ดํฐ ์ฒ๋ฆฌ ์์")
|
| 36 |
+
records_71847 = self._process_71847()
|
| 37 |
+
logger.info("71847 ๋ฐ์ดํฐ %d๊ฐ ์์ง", len(records_71847))
|
| 38 |
+
records.extend(records_71847)
|
| 39 |
+
|
| 40 |
+
logger.info("์ค๋ณต ์ ๊ฑฐ ์ ์ด %d๊ฐ", len(records))
|
| 41 |
+
records = self._deduplicate(records)
|
| 42 |
+
logger.info("์ค๋ณต ์ ๊ฑฐ ํ %d๊ฐ", len(records))
|
| 43 |
+
|
| 44 |
+
records = self._filter(records)
|
| 45 |
+
logger.info("ํํฐ๋ง ํ %d๊ฐ", len(records))
|
| 46 |
+
|
| 47 |
+
train, val = self._split(records)
|
| 48 |
+
logger.info("train=%d, val=%d", len(train), len(val))
|
| 49 |
+
|
| 50 |
+
output_dir = Path(self.config.output_dir)
|
| 51 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 52 |
+
|
| 53 |
+
self._save_jsonl(train, output_dir / "train.jsonl")
|
| 54 |
+
self._save_jsonl(val, output_dir / "val.jsonl")
|
| 55 |
+
|
| 56 |
+
return {
|
| 57 |
+
"total": len(records),
|
| 58 |
+
"train": len(train),
|
| 59 |
+
"val": len(val),
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# ------------------------------------------------------------------
|
| 63 |
+
# ๋ฐ์ดํฐ์
๋ณ ์ฒ๋ฆฌ
|
| 64 |
+
# ------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
def _process_71852(self) -> list[dict]:
|
| 67 |
+
base = Path(self.config.raw_dir) / "71852"
|
| 68 |
+
records: list[dict] = []
|
| 69 |
+
|
| 70 |
+
# ๊ตญ๋ฆฝ์์์๋ฌธํ์ ๋น
|
| 71 |
+
gukrp = GukripParser()
|
| 72 |
+
for split in ("train", "val"):
|
| 73 |
+
dir_path = base / split / "๊ตญ๋ฆฝ"
|
| 74 |
+
if dir_path.exists():
|
| 75 |
+
records.extend(self._parse_dir(gukrp, dir_path))
|
| 76 |
+
|
| 77 |
+
# ์ค์ํ์ ๊ธฐ๊ด
|
| 78 |
+
gov_central = GovQAParser()
|
| 79 |
+
for split in ("train", "val"):
|
| 80 |
+
dir_path = base / split / "์ค์"
|
| 81 |
+
if dir_path.exists():
|
| 82 |
+
records.extend(self._parse_dir(gov_central, dir_path))
|
| 83 |
+
|
| 84 |
+
# ์ง๋ฐฉํ์ ๊ธฐ๊ด
|
| 85 |
+
gov_local = GovQALocalParser()
|
| 86 |
+
for split in ("train", "val"):
|
| 87 |
+
dir_path = base / split / "์ง๋ฐฉ"
|
| 88 |
+
if dir_path.exists():
|
| 89 |
+
records.extend(self._parse_dir(gov_local, dir_path))
|
| 90 |
+
|
| 91 |
+
return records
|
| 92 |
+
|
| 93 |
+
def _process_71847(self) -> list[dict]:
|
| 94 |
+
base = Path(self.config.raw_dir) / "71847"
|
| 95 |
+
records: list[dict] = []
|
| 96 |
+
|
| 97 |
+
# ๊ฒฐ์ ๋ก QA
|
| 98 |
+
decision_parser = AdminLawParser(source_label="71847_๊ฒฐ์ ๋ก")
|
| 99 |
+
dir_path = base / "TL_๊ฒฐ์ ๋ก_QA"
|
| 100 |
+
if dir_path.exists():
|
| 101 |
+
records.extend(self._parse_dir(decision_parser, dir_path))
|
| 102 |
+
|
| 103 |
+
# ๋ฒ๋ น QA
|
| 104 |
+
law_parser = AdminLawParser(source_label="71847_๋ฒ๋ น")
|
| 105 |
+
dir_path = base / "TL_๋ฒ๋ น_QA"
|
| 106 |
+
if dir_path.exists():
|
| 107 |
+
records.extend(self._parse_dir(law_parser, dir_path))
|
| 108 |
+
|
| 109 |
+
return records
|
| 110 |
+
|
| 111 |
+
# ------------------------------------------------------------------
|
| 112 |
+
# ์ ํธ๋ฆฌํฐ
|
| 113 |
+
# ------------------------------------------------------------------
|
| 114 |
+
|
| 115 |
+
@staticmethod
|
| 116 |
+
def _parse_dir(parser: Any, dir_path: Path) -> list[dict]:
|
| 117 |
+
records: list[dict] = []
|
| 118 |
+
json_files = list(dir_path.glob("*.json"))
|
| 119 |
+
logger.debug(" %s: %d ํ์ผ", dir_path, len(json_files))
|
| 120 |
+
for filepath in json_files:
|
| 121 |
+
try:
|
| 122 |
+
records.extend(parser.parse(filepath))
|
| 123 |
+
except Exception as exc: # noqa: BLE001
|
| 124 |
+
logger.warning("ํ์ฑ ์คํจ %s: %s", filepath, exc)
|
| 125 |
+
return records
|
| 126 |
+
|
| 127 |
+
def _deduplicate(self, records: list[dict]) -> list[dict]:
|
| 128 |
+
"""์ง๋ฌธ+๋ต๋ณ ํด์ ๊ธฐ๋ฐ ์ค๋ณต ์ ๊ฑฐ."""
|
| 129 |
+
seen: set[str] = set()
|
| 130 |
+
unique: list[dict] = []
|
| 131 |
+
for rec in records:
|
| 132 |
+
key = hashlib.md5( # nosec B324
|
| 133 |
+
(rec["question"] + rec["answer"]).encode("utf-8"),
|
| 134 |
+
usedforsecurity=False,
|
| 135 |
+
).hexdigest()
|
| 136 |
+
if key not in seen:
|
| 137 |
+
seen.add(key)
|
| 138 |
+
unique.append(rec)
|
| 139 |
+
return unique
|
| 140 |
+
|
| 141 |
+
def _filter(self, records: list[dict]) -> list[dict]:
|
| 142 |
+
"""๊ธธ์ด ํํฐ๋ง."""
|
| 143 |
+
filtered: list[dict] = []
|
| 144 |
+
for rec in records:
|
| 145 |
+
answer_len = len(rec["answer"])
|
| 146 |
+
question_len = len(rec["question"])
|
| 147 |
+
if answer_len < self.config.min_answer_length:
|
| 148 |
+
continue
|
| 149 |
+
if answer_len > self.config.max_answer_length:
|
| 150 |
+
continue
|
| 151 |
+
if question_len < self.config.min_question_length:
|
| 152 |
+
continue
|
| 153 |
+
filtered.append(rec)
|
| 154 |
+
return filtered
|
| 155 |
+
|
| 156 |
+
def _split(self, records: list[dict]) -> tuple[list[dict], list[dict]]:
|
| 157 |
+
"""train/val ๋ถ๋ฆฌ (์
ํ ํ ๋น์จ ๋ถํ )."""
|
| 158 |
+
shuffled = list(records)
|
| 159 |
+
random.seed(42)
|
| 160 |
+
random.shuffle(shuffled)
|
| 161 |
+
split_idx = int(len(shuffled) * self.config.train_ratio)
|
| 162 |
+
return shuffled[:split_idx], shuffled[split_idx:]
|
| 163 |
+
|
| 164 |
+
def _save_jsonl(self, records: list[dict], filepath: Path) -> None:
|
| 165 |
+
"""Instruction-tuning ํ์ค JSONL ํ์์ผ๋ก ์ ์ฅ."""
|
| 166 |
+
filepath = Path(filepath)
|
| 167 |
+
with open(filepath, "w", encoding="utf-8") as f:
|
| 168 |
+
for rec in records:
|
| 169 |
+
line = {
|
| 170 |
+
"instruction": self.INSTRUCTION_TEXT,
|
| 171 |
+
"input": rec["question"],
|
| 172 |
+
"output": rec["answer"],
|
| 173 |
+
"source": rec["source"],
|
| 174 |
+
"category": rec.get("category", ""),
|
| 175 |
+
}
|
| 176 |
+
f.write(json.dumps(line, ensure_ascii=False) + "\n")
|
| 177 |
+
logger.info("์ ์ฅ ์๋ฃ: %s (%d ๋ ์ฝ๋)", filepath, len(records))
|
src/inference/.gitkeep
ADDED
|
File without changes
|
src/inference/__init__.py
ADDED
|
File without changes
|
src/inference/actions/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .base import ActionResult, BaseAction, Citation
|
| 2 |
+
from .data_go_kr import MinwonAnalysisAction
|
| 3 |
+
|
| 4 |
+
__all__ = ["ActionResult", "BaseAction", "Citation", "MinwonAnalysisAction"]
|
src/inference/actions/base.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Action ์ถ์ ๋ฒ ์ด์ค ํด๋์ค ๋ชจ๋.
|
| 2 |
+
|
| 3 |
+
AgentLoop์์ ์ธ๋ถ API๋ ์๋น์ค๋ฅผ ํธ์ถํ๋ Action์
|
| 4 |
+
๊ณตํต ์ธํฐํ์ด์ค์ ๊ฒฐ๊ณผ ํ์
์ ์ ์ํ๋ค.
|
| 5 |
+
|
| 6 |
+
Issue: #394
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from abc import ABC, abstractmethod
|
| 10 |
+
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Any, Dict, List, Optional
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from ..session_context import SessionContext
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class Citation:
|
| 20 |
+
"""์ถ์ฒ ์ ๋ณด.
|
| 21 |
+
|
| 22 |
+
API ์๋ต์ด๋ ๊ฒ์ ๊ฒฐ๊ณผ์ ์ถ์ฒ๋ฅผ ํํํ๋ค.
|
| 23 |
+
|
| 24 |
+
Parameters
|
| 25 |
+
----------
|
| 26 |
+
title : str
|
| 27 |
+
์ถ์ฒ ์ ๋ชฉ.
|
| 28 |
+
url : str
|
| 29 |
+
์ถ์ฒ URL. ์์ผ๋ฉด ๋น ๋ฌธ์์ด.
|
| 30 |
+
date : str
|
| 31 |
+
์์ฑ/๋ฑ๋ก ๋ ์ง. ์์ผ๋ฉด ๋น ๋ฌธ์์ด.
|
| 32 |
+
snippet : str
|
| 33 |
+
๋ณธ๋ฌธ ์์ฝ(๋ฐ์ท). ์์ผ๋ฉด ๋น ๋ฌธ์์ด.
|
| 34 |
+
metadata : Dict[str, Any]
|
| 35 |
+
์ถ๊ฐ ๋ฉํ๋ฐ์ดํฐ.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
title: str
|
| 39 |
+
url: str = ""
|
| 40 |
+
date: str = ""
|
| 41 |
+
snippet: str = ""
|
| 42 |
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
| 43 |
+
|
| 44 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 45 |
+
return {
|
| 46 |
+
"title": self.title,
|
| 47 |
+
"url": self.url,
|
| 48 |
+
"date": self.date,
|
| 49 |
+
"snippet": self.snippet,
|
| 50 |
+
"metadata": self.metadata,
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@dataclass
|
| 55 |
+
class ActionResult:
|
| 56 |
+
"""Action ์คํ ๊ฒฐ๊ณผ.
|
| 57 |
+
|
| 58 |
+
BaseAction.execute()์ ๋ฐํ๊ฐ์ผ๋ก,
|
| 59 |
+
AgentLoop์ ToolFunction ๋ฐํ ํ์(dict)๊ณผ ํธํ๋๋ค.
|
| 60 |
+
|
| 61 |
+
Parameters
|
| 62 |
+
----------
|
| 63 |
+
success : bool
|
| 64 |
+
์คํ ์ฑ๊ณต ์ฌ๋ถ.
|
| 65 |
+
data : Dict[str, Any]
|
| 66 |
+
์ฑ๊ณต ์ ํ์ด๋ก๋.
|
| 67 |
+
error : Optional[str]
|
| 68 |
+
์คํจ ์ ์ค๋ฅ ๋ฉ์์ง.
|
| 69 |
+
source : str
|
| 70 |
+
๊ฒฐ๊ณผ ์ถ์ฒ ์๋ณ์ (์: "data.go.kr").
|
| 71 |
+
citations : List[Citation]
|
| 72 |
+
์ฐธ์กฐ๋ ์ถ์ฒ ๋ชฉ๋ก.
|
| 73 |
+
context_text : str
|
| 74 |
+
LLM ํ๋กฌํํธ์ ์ฝ์
ํ ์ปจํ
์คํธ ํ
์คํธ.
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
success: bool
|
| 78 |
+
data: Dict[str, Any] = field(default_factory=dict)
|
| 79 |
+
error: Optional[str] = None
|
| 80 |
+
source: str = ""
|
| 81 |
+
citations: List[Citation] = field(default_factory=list)
|
| 82 |
+
context_text: str = ""
|
| 83 |
+
|
| 84 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 85 |
+
"""AgentLoop ToolFunction ๋ฐํ ํ์(dict)์ผ๋ก ๋ณํ."""
|
| 86 |
+
return {
|
| 87 |
+
"success": self.success,
|
| 88 |
+
"data": self.data,
|
| 89 |
+
"error": self.error,
|
| 90 |
+
"source": self.source,
|
| 91 |
+
"citations": [c.to_dict() for c in self.citations],
|
| 92 |
+
"context_text": self.context_text,
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class BaseAction(ABC):
|
| 97 |
+
"""Action ์ถ์ ๋ฒ ์ด์ค ํด๋์ค.
|
| 98 |
+
|
| 99 |
+
AgentLoop์ ToolFunction ์๊ทธ๋์ฒ(query, context, session) -> dict์
|
| 100 |
+
ํธํ๋๋ ๋น๋๊ธฐ callable์ ์ ๊ณตํ๋ค.
|
| 101 |
+
|
| 102 |
+
Parameters
|
| 103 |
+
----------
|
| 104 |
+
action_name : str
|
| 105 |
+
Action ์๋ณ์. ๋ก๊น
์ ์ฌ์ฉ๋๋ค.
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
def __init__(self, action_name: str) -> None:
|
| 109 |
+
self._action_name = action_name
|
| 110 |
+
|
| 111 |
+
async def __call__(
|
| 112 |
+
self,
|
| 113 |
+
query: str,
|
| 114 |
+
context: Dict[str, Any],
|
| 115 |
+
session: SessionContext,
|
| 116 |
+
) -> Dict[str, Any]:
|
| 117 |
+
"""AgentLoop ToolFunction ์๊ทธ๋์ฒ ํธํ ์ง์
์ .
|
| 118 |
+
|
| 119 |
+
1. validate()๋ก ์ฌ์ ๊ฒ์ฆ.
|
| 120 |
+
2. ๊ฒ์ฆ ํต๊ณผ ์ execute() ์คํ.
|
| 121 |
+
3. ActionResult๋ฅผ dict๋ก ๋ณํํด ๋ฐํ.
|
| 122 |
+
4. ์ฑ๊ณต/์คํจ๋ฅผ ๋ก๊น
ํ๋ค.
|
| 123 |
+
|
| 124 |
+
Parameters
|
| 125 |
+
----------
|
| 126 |
+
query : str
|
| 127 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 128 |
+
context : Dict[str, Any]
|
| 129 |
+
AgentLoop ๋์ ์ปจํ
์คํธ(์ด์ tool ๊ฒฐ๊ณผ ํฌํจ).
|
| 130 |
+
session : SessionContext
|
| 131 |
+
ํ์ฌ ์ธ์
์ปจํ
์คํธ.
|
| 132 |
+
|
| 133 |
+
Returns
|
| 134 |
+
-------
|
| 135 |
+
Dict[str, Any]
|
| 136 |
+
ActionResult.to_dict() ๊ฒฐ๊ณผ.
|
| 137 |
+
"""
|
| 138 |
+
# 1. ์ฌ์ ๊ฒ์ฆ
|
| 139 |
+
validation_error = self.validate(query, context, session)
|
| 140 |
+
if validation_error:
|
| 141 |
+
logger.warning(f"[{self._action_name}] ๊ฒ์ฆ ์คํจ: {validation_error}")
|
| 142 |
+
result = ActionResult(
|
| 143 |
+
success=False,
|
| 144 |
+
error=validation_error,
|
| 145 |
+
source=self._action_name,
|
| 146 |
+
)
|
| 147 |
+
return result.to_dict()
|
| 148 |
+
|
| 149 |
+
# 2. ์คํ
|
| 150 |
+
try:
|
| 151 |
+
result = await self.execute(query, context, session)
|
| 152 |
+
except Exception as exc:
|
| 153 |
+
logger.error(
|
| 154 |
+
f"[{self._action_name}] execute() ์์ธ ๋ฐ์: {exc}",
|
| 155 |
+
exc_info=True,
|
| 156 |
+
)
|
| 157 |
+
result = ActionResult(
|
| 158 |
+
success=False,
|
| 159 |
+
error=f"Action ์คํ ์ค ์ค๋ฅ: {exc}",
|
| 160 |
+
source=self._action_name,
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# 3. ๋ก๊น
|
| 164 |
+
if result.success:
|
| 165 |
+
logger.info(
|
| 166 |
+
f"[{self._action_name}] ์ฑ๊ณต "
|
| 167 |
+
f"citations={len(result.citations)} "
|
| 168 |
+
f"context_text_len={len(result.context_text)}"
|
| 169 |
+
)
|
| 170 |
+
else:
|
| 171 |
+
logger.warning(f"[{self._action_name}] ์คํจ: {result.error}")
|
| 172 |
+
|
| 173 |
+
return result.to_dict()
|
| 174 |
+
|
| 175 |
+
def validate(
|
| 176 |
+
self,
|
| 177 |
+
query: str,
|
| 178 |
+
context: Dict[str, Any],
|
| 179 |
+
session: SessionContext,
|
| 180 |
+
) -> Optional[str]:
|
| 181 |
+
"""์คํ ์ ์ฌ์ ๊ฒ์ฆ. ์ค๋ฅ๊ฐ ์์ผ๋ฉด ์ค๋ฅ ๋ฉ์์ง ๋ฌธ์์ด์ ๋ฐํํ๋ค.
|
| 182 |
+
|
| 183 |
+
๊ธฐ๋ณธ ๊ตฌํ: ๋น ์ฟผ๋ฆฌ ๊ฒ์ฌ. ์๋ธํด๋์ค์์ super() ํธ์ถ ํ ์ถ๊ฐ ๊ฒ์ฆ ๊ฐ๋ฅ.
|
| 184 |
+
|
| 185 |
+
Parameters
|
| 186 |
+
----------
|
| 187 |
+
query : str
|
| 188 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 189 |
+
context : Dict[str, Any]
|
| 190 |
+
AgentLoop ๋์ ์ปจํ
์คํธ.
|
| 191 |
+
session : SessionContext
|
| 192 |
+
ํ์ฌ ์ธ์
์ปจํ
์คํธ.
|
| 193 |
+
|
| 194 |
+
Returns
|
| 195 |
+
-------
|
| 196 |
+
Optional[str]
|
| 197 |
+
๊ฒ์ฆ ์คํจ ๋ฉ์์ง. None์ด๋ฉด ๊ฒ์ฆ ํต๊ณผ.
|
| 198 |
+
"""
|
| 199 |
+
if not query or not query.strip():
|
| 200 |
+
return "์ฟผ๋ฆฌ๊ฐ ๋น์ด ์์ต๋๋ค."
|
| 201 |
+
return None
|
| 202 |
+
|
| 203 |
+
@abstractmethod
|
| 204 |
+
async def execute(
|
| 205 |
+
self,
|
| 206 |
+
query: str,
|
| 207 |
+
context: Dict[str, Any],
|
| 208 |
+
session: SessionContext,
|
| 209 |
+
) -> ActionResult:
|
| 210 |
+
"""Action ์ค์ ์คํ ๋ก์ง. ์๋ธํด๋์ค์์ ๊ตฌํํ๋ค.
|
| 211 |
+
|
| 212 |
+
Parameters
|
| 213 |
+
----------
|
| 214 |
+
query : str
|
| 215 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 216 |
+
context : Dict[str, Any]
|
| 217 |
+
AgentLoop ๋์ ์ปจํ
์คํธ.
|
| 218 |
+
session : SessionContext
|
| 219 |
+
ํ์ฌ ์ธ์
์ปจํ
์คํธ.
|
| 220 |
+
|
| 221 |
+
Returns
|
| 222 |
+
-------
|
| 223 |
+
ActionResult
|
| 224 |
+
์คํ ๊ฒฐ๊ณผ.
|
| 225 |
+
"""
|
| 226 |
+
...
|
src/inference/actions/data_go_kr.py
ADDED
|
@@ -0,0 +1,952 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""data.go.kr ๋ฏผ์ ๋ถ์ API Action ๋ชจ๋.
|
| 2 |
+
|
| 3 |
+
๊ณต๊ณต๋ฐ์ดํฐํฌํธ(data.go.kr)์ ๋ฏผ์๋ถ์์ ๋ณด์กฐํ API๋ฅผ ํธ์ถํ์ฌ
|
| 4 |
+
์ ์ฌ ๋ฏผ์ ์ฌ๋ก๋ฅผ ๊ฒ์ํ๊ณ LLM ์ปจํ
์คํธ๋ก ๋ณํํ๋ค.
|
| 5 |
+
|
| 6 |
+
API ๋ฌธ์: https://www.data.go.kr/data/15025759/openapi.do
|
| 7 |
+
Issue: #394
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
from typing import Any, Dict, List, Optional
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from ..session_context import SessionContext
|
| 16 |
+
from .base import ActionResult, BaseAction, Citation
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
import httpx
|
| 20 |
+
|
| 21 |
+
_HTTPX_AVAILABLE = True
|
| 22 |
+
_HttpxTimeoutError = httpx.TimeoutException
|
| 23 |
+
_HttpxStatusError = httpx.HTTPStatusError
|
| 24 |
+
except ImportError:
|
| 25 |
+
httpx = None # type: ignore
|
| 26 |
+
_HTTPX_AVAILABLE = False
|
| 27 |
+
_HttpxTimeoutError = type(None) # ์ ๋ ๋งค์น๋์ง ์๋ ํ์
|
| 28 |
+
_HttpxStatusError = type(None)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
# ์์
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
|
| 35 |
+
_BASE_URL = "http://apis.data.go.kr/1140100/minAnalsInfoView5"
|
| 36 |
+
_ENDPOINT_SIMILAR = "/minSimilarInfo5"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class MinwonAnalysisAction(BaseAction):
|
| 40 |
+
"""๊ณต๊ณต๋ฐ์ดํฐํฌํธ ๋ฏผ์๋ถ์์ ๋ณด์กฐํ API Action.
|
| 41 |
+
|
| 42 |
+
data.go.kr์ ๋ฏผ์๋ถ์์ ๋ณด์กฐํ API๋ฅผ ํธ์ถํ์ฌ
|
| 43 |
+
์ ์ฌ ๋ฏผ์ ์ฌ๋ก๋ฅผ ๊ฐ์ ธ์ค๊ณ AgentLoop ์ปจํ
์คํธ์ ์ ๊ณตํ๋ค.
|
| 44 |
+
|
| 45 |
+
Parameters
|
| 46 |
+
----------
|
| 47 |
+
api_key : Optional[str]
|
| 48 |
+
๊ณต๊ณต๋ฐ์ดํฐํฌํธ API ์ธ์ฆํค. None์ด๋ฉด DATA_GO_KR_API_KEY ํ๊ฒฝ๋ณ์์์ ๋ก๋.
|
| 49 |
+
ret_count : int
|
| 50 |
+
๋ฐํํ ์ ์ฌ ์ฌ๋ก ์. ๊ธฐ๋ณธ๊ฐ 5.
|
| 51 |
+
min_score : int
|
| 52 |
+
์ต์ ์ ์ฌ๋ ์ ์. ๊ธฐ๋ณธ๊ฐ 2.
|
| 53 |
+
timeout : float
|
| 54 |
+
HTTP ์์ฒญ ํ์์์(์ด). ๊ธฐ๋ณธ๊ฐ 10.0.
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
def __init__(
|
| 58 |
+
self,
|
| 59 |
+
api_key: Optional[str] = None,
|
| 60 |
+
ret_count: int = 5,
|
| 61 |
+
min_score: int = 2,
|
| 62 |
+
timeout: float = 10.0,
|
| 63 |
+
) -> None:
|
| 64 |
+
super().__init__(action_name="minwon_analysis")
|
| 65 |
+
self._api_key = api_key or os.getenv("DATA_GO_KR_API_KEY", "")
|
| 66 |
+
self._ret_count = ret_count
|
| 67 |
+
self._min_score = min_score
|
| 68 |
+
self._timeout = timeout
|
| 69 |
+
|
| 70 |
+
def validate(
|
| 71 |
+
self,
|
| 72 |
+
query: str,
|
| 73 |
+
context: Dict[str, Any],
|
| 74 |
+
session: SessionContext,
|
| 75 |
+
) -> Optional[str]:
|
| 76 |
+
"""API ํค์ ์ฟผ๋ฆฌ ๊ธธ์ด๋ฅผ ๊ฒ์ฆํ๋ค.
|
| 77 |
+
|
| 78 |
+
Parameters
|
| 79 |
+
----------
|
| 80 |
+
query : str
|
| 81 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 82 |
+
context : Dict[str, Any]
|
| 83 |
+
AgentLoop ๋์ ์ปจํ
์คํธ.
|
| 84 |
+
session : SessionContext
|
| 85 |
+
ํ์ฌ ์ธ์
์ปจํ
์คํธ.
|
| 86 |
+
|
| 87 |
+
Returns
|
| 88 |
+
-------
|
| 89 |
+
Optional[str]
|
| 90 |
+
๊ฒ์ฆ ์คํจ ๋ฉ์์ง. None์ด๋ฉด ํต๊ณผ.
|
| 91 |
+
"""
|
| 92 |
+
base_error = super().validate(query, context, session)
|
| 93 |
+
if base_error:
|
| 94 |
+
return base_error
|
| 95 |
+
|
| 96 |
+
if not self._api_key:
|
| 97 |
+
return "DATA_GO_KR_API_KEY ํ๊ฒฝ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค."
|
| 98 |
+
|
| 99 |
+
if len(query.strip()) < 2:
|
| 100 |
+
return "์ฟผ๋ฆฌ๊ฐ ๋๋ฌด ์งง์ต๋๋ค (์ต์ 2์ ์ด์)."
|
| 101 |
+
|
| 102 |
+
if not _HTTPX_AVAILABLE:
|
| 103 |
+
return "httpx ํจํค์ง๊ฐ ์ค์น๋์ง ์์์ต๋๋ค. pip install httpx>=0.27.0"
|
| 104 |
+
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
async def execute(
|
| 108 |
+
self,
|
| 109 |
+
query: str,
|
| 110 |
+
context: Dict[str, Any],
|
| 111 |
+
session: SessionContext,
|
| 112 |
+
) -> ActionResult:
|
| 113 |
+
"""์ ์ฌ ๋ฏผ์ ์ฌ๋ก๋ฅผ ์กฐํํ๊ณ ActionResult๋ก ๋ฐํํ๋ค.
|
| 114 |
+
|
| 115 |
+
1. _enrich_query๋ก ๋ถ๋ฅ ์นดํ
๊ณ ๋ฆฌ๋ฅผ ๋ฐ์ํ ๊ฒ์์ด ์์ฑ.
|
| 116 |
+
2. _call_similar_api๋ก API ํธ์ถ.
|
| 117 |
+
3. ๊ฒฐ๊ณผ๋ฅผ ํ์ฑํ์ฌ ActionResult ์์ฑ.
|
| 118 |
+
|
| 119 |
+
Parameters
|
| 120 |
+
----------
|
| 121 |
+
query : str
|
| 122 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 123 |
+
context : Dict[str, Any]
|
| 124 |
+
AgentLoop ๋์ ์ปจํ
์คํธ.
|
| 125 |
+
session : SessionContext
|
| 126 |
+
ํ์ฌ ์ธ์
์ปจํ
์คํธ.
|
| 127 |
+
|
| 128 |
+
Returns
|
| 129 |
+
-------
|
| 130 |
+
ActionResult
|
| 131 |
+
์ ์ฌ ๋ฏผ์ ์ฌ๋ก์ LLM ์ปจํ
์คํธ๊ฐ ํฌํจ๋ ๊ฒฐ๊ณผ.
|
| 132 |
+
"""
|
| 133 |
+
payload = await self.fetch_similar_cases(query, context)
|
| 134 |
+
items = payload["results"]
|
| 135 |
+
|
| 136 |
+
if items is None:
|
| 137 |
+
return ActionResult(
|
| 138 |
+
success=False,
|
| 139 |
+
error="๋ฏผ์ ๋ถ์ API ํธ์ถ์ ์คํจํ์ต๋๋ค.",
|
| 140 |
+
source="data.go.kr",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if not items:
|
| 144 |
+
return ActionResult(
|
| 145 |
+
success=True,
|
| 146 |
+
data={"results": [], "query": payload["query"], "count": 0},
|
| 147 |
+
source="data.go.kr",
|
| 148 |
+
context_text="",
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
return ActionResult(
|
| 152 |
+
success=True,
|
| 153 |
+
data={
|
| 154 |
+
"results": items,
|
| 155 |
+
"query": payload["query"],
|
| 156 |
+
"count": len(items),
|
| 157 |
+
},
|
| 158 |
+
source="data.go.kr",
|
| 159 |
+
citations=payload["citations"],
|
| 160 |
+
context_text=payload["context_text"],
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
async def fetch_similar_cases(
|
| 164 |
+
self,
|
| 165 |
+
query: str,
|
| 166 |
+
context: Dict[str, Any],
|
| 167 |
+
ret_count: Optional[int] = None,
|
| 168 |
+
min_score: Optional[int] = None,
|
| 169 |
+
) -> Dict[str, Any]:
|
| 170 |
+
"""์ ์ฌ ๋ฏผ์ ์ฌ๋ก ๊ฒ์์ ํ์ํ payload๋ฅผ ๊ตฌ์ฑํ๋ค.
|
| 171 |
+
|
| 172 |
+
api_lookup capability ๋ด๋ถ์์ minSimilarInfo5 ํธ์ถ ๊ฒฝ๋ก๋ฅผ
|
| 173 |
+
๊ณต์ฉ์ผ๋ก ์ฌ์ฌ์ฉํ ์ ์๋๋ก ๊ณต๊ฐ helper๋ก ์ ๊ณตํ๋ค.
|
| 174 |
+
|
| 175 |
+
Parameters
|
| 176 |
+
----------
|
| 177 |
+
ret_count : Optional[int]
|
| 178 |
+
๋ฐํ ๊ฑด์ ์ค๋ฒ๋ผ์ด๋.
|
| 179 |
+
min_score : Optional[int]
|
| 180 |
+
์ต์ ์ ์ฌ๋ ์ค๋ฒ๋ผ์ด๋.
|
| 181 |
+
"""
|
| 182 |
+
search_query = self._enrich_query(query, context)
|
| 183 |
+
logger.debug(f"[minwon_analysis] ๋ณด๊ฐ๋ ๊ฒ์์ด: {search_query!r}")
|
| 184 |
+
items = await self._call_similar_api(search_query, ret_count=ret_count, min_score=min_score)
|
| 185 |
+
|
| 186 |
+
return {
|
| 187 |
+
"query": search_query,
|
| 188 |
+
"results": items,
|
| 189 |
+
"count": len(items or []),
|
| 190 |
+
"context_text": self._build_context_text(items or [], query) if items else "",
|
| 191 |
+
"citations": self._build_citations(items or []),
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
async def _call_similar_api(
|
| 195 |
+
self,
|
| 196 |
+
search_query: str,
|
| 197 |
+
ret_count: Optional[int] = None,
|
| 198 |
+
min_score: Optional[int] = None,
|
| 199 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 200 |
+
"""๊ณต๊ณต๋ฐ์ดํฐํฌํธ ์ ์ฌ๋ฏผ์์ ๋ณด API๋ฅผ ํธ์ถํ๋ค.
|
| 201 |
+
|
| 202 |
+
Parameters
|
| 203 |
+
----------
|
| 204 |
+
search_query : str
|
| 205 |
+
API์ ์ ๋ฌํ ๊ฒ์์ด.
|
| 206 |
+
ret_count : Optional[int]
|
| 207 |
+
๋ฐํ ๊ฑด์ ์ค๋ฒ๋ผ์ด๋. None์ด๋ฉด ์ธ์คํด์ค ๊ธฐ๋ณธ๊ฐ.
|
| 208 |
+
min_score : Optional[int]
|
| 209 |
+
์ต์ ์ ์ฌ๋ ์ค๋ฒ๋ผ์ด๋. None์ด๋ฉด ์ธ์คํด์ค ๊ธฐ๋ณธ๊ฐ.
|
| 210 |
+
|
| 211 |
+
Returns
|
| 212 |
+
-------
|
| 213 |
+
Optional[List[Dict[str, Any]]]
|
| 214 |
+
์ฑ๊ณต ์ ์์ดํ
๋ชฉ๋ก, ์คํจ ์ None.
|
| 215 |
+
"""
|
| 216 |
+
url = _BASE_URL + _ENDPOINT_SIMILAR
|
| 217 |
+
params = {
|
| 218 |
+
"serviceKey": self._api_key,
|
| 219 |
+
"startPos": 1,
|
| 220 |
+
"retCount": ret_count if ret_count is not None else self._ret_count,
|
| 221 |
+
"target": "qna,qna_origin",
|
| 222 |
+
"minScore": min_score if min_score is not None else self._min_score,
|
| 223 |
+
"dataType": "json",
|
| 224 |
+
"searchword": search_query,
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
try:
|
| 228 |
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
| 229 |
+
response = await client.get(url, params=params)
|
| 230 |
+
response.raise_for_status()
|
| 231 |
+
body = response.json()
|
| 232 |
+
except _HttpxTimeoutError as exc:
|
| 233 |
+
logger.warning(f"[minwon_analysis] API ํ์์์: {exc}")
|
| 234 |
+
return None
|
| 235 |
+
except _HttpxStatusError as exc:
|
| 236 |
+
logger.warning(f"[minwon_analysis] HTTP ์ค๋ฅ {exc.response.status_code}: {exc}")
|
| 237 |
+
return None
|
| 238 |
+
except Exception as exc:
|
| 239 |
+
logger.error(f"[minwon_analysis] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 240 |
+
return None
|
| 241 |
+
|
| 242 |
+
# ์ค์ API๋ ์ต์์ ๋ฐฐ์ด([]) ๋๋ returnObject ๋ํ์ผ๋ก ์๋ต
|
| 243 |
+
if isinstance(body, list):
|
| 244 |
+
return body
|
| 245 |
+
|
| 246 |
+
if not isinstance(body, dict):
|
| 247 |
+
logger.warning(f"[minwon_analysis] ์์์น ๋ชปํ ์๋ต ํ์
: {type(body)}")
|
| 248 |
+
return None
|
| 249 |
+
|
| 250 |
+
# returnObject ๋ํ
|
| 251 |
+
if "returnObject" in body:
|
| 252 |
+
obj = body["returnObject"]
|
| 253 |
+
return obj if isinstance(obj, list) else []
|
| 254 |
+
|
| 255 |
+
# ์๋ฌ ์๋ต ๊ฒ์ฌ โ ์ฑ๊ณต ์ฝ๋๋ง ํต๊ณผ
|
| 256 |
+
_SUCCESS_CODES = {"00", "0", "200", ""}
|
| 257 |
+
code = str(body.get("code", body.get("resultCode", "00")))
|
| 258 |
+
if code not in _SUCCESS_CODES:
|
| 259 |
+
logger.warning(
|
| 260 |
+
f"[minwon_analysis] API ์๋ฌ (code={code}): {body.get('msg', body.get('resultMsg', ''))}"
|
| 261 |
+
)
|
| 262 |
+
return None
|
| 263 |
+
|
| 264 |
+
return self._parse_similar_items(body)
|
| 265 |
+
|
| 266 |
+
def _parse_similar_items(self, raw_body: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 267 |
+
"""API ์๋ต์์ ์์ดํ
๋ชฉ๋ก์ ์ถ์ถํ๋ค.
|
| 268 |
+
|
| 269 |
+
๋ฐฐ์ด ํ์๊ณผ ๋จ์ผ dict ๋ํ ํ์์ ๋ชจ๋ ์ฒ๋ฆฌํ๋ค.
|
| 270 |
+
|
| 271 |
+
Parameters
|
| 272 |
+
----------
|
| 273 |
+
raw_body : Dict[str, Any]
|
| 274 |
+
API ์ ์ฒด ์๋ต JSON.
|
| 275 |
+
|
| 276 |
+
Returns
|
| 277 |
+
-------
|
| 278 |
+
List[Dict[str, Any]]
|
| 279 |
+
ํ์ฑ๋ ์์ดํ
๋ชฉ๋ก.
|
| 280 |
+
"""
|
| 281 |
+
# ์ต์์ ํค ํ์: body โ items โ item ๋๋ ์ง์ items
|
| 282 |
+
body = raw_body.get("body") or raw_body.get("response", {}).get("body") or raw_body
|
| 283 |
+
items_raw = body.get("items") if isinstance(body, dict) else None
|
| 284 |
+
|
| 285 |
+
if items_raw is None:
|
| 286 |
+
logger.debug("[minwon_analysis] ์๋ต์ 'items' ํค ์์ โ ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 287 |
+
return []
|
| 288 |
+
|
| 289 |
+
# ๋ฐฐ์ด vs dict ๋ํ ์ฒ๋ฆฌ
|
| 290 |
+
if isinstance(items_raw, list):
|
| 291 |
+
return items_raw
|
| 292 |
+
if isinstance(items_raw, dict):
|
| 293 |
+
item = items_raw.get("item")
|
| 294 |
+
if item is None:
|
| 295 |
+
return []
|
| 296 |
+
if isinstance(item, list):
|
| 297 |
+
return item
|
| 298 |
+
if isinstance(item, dict):
|
| 299 |
+
return [item]
|
| 300 |
+
|
| 301 |
+
logger.warning(f"[minwon_analysis] ์์์น ๋ชปํ items ํ์: {type(items_raw)}")
|
| 302 |
+
return []
|
| 303 |
+
|
| 304 |
+
def _build_context_text(self, items: List[Dict[str, Any]], query: str) -> str:
|
| 305 |
+
"""์์ดํ
๋ชฉ๋ก์ LLM ํ๋กฌํํธ์ฉ ์ปจํ
์คํธ ํ
์คํธ๋ก ๋ณํํ๋ค.
|
| 306 |
+
|
| 307 |
+
Parameters
|
| 308 |
+
----------
|
| 309 |
+
items : List[Dict[str, Any]]
|
| 310 |
+
API์์ ๋ฐํ๋ ์์ดํ
๋ชฉ๋ก.
|
| 311 |
+
query : str
|
| 312 |
+
์๋ณธ ์ฌ์ฉ์ ์ฟผ๋ฆฌ.
|
| 313 |
+
|
| 314 |
+
Returns
|
| 315 |
+
-------
|
| 316 |
+
str
|
| 317 |
+
LLM ํ๋กฌํํธ์ ์ฝ์
ํ ํ
์คํธ.
|
| 318 |
+
"""
|
| 319 |
+
if not items:
|
| 320 |
+
return ""
|
| 321 |
+
|
| 322 |
+
lines = [f"### ๊ณต๊ณต๋ฐ์ดํฐํฌํธ ์ ์ฌ ๋ฏผ์ ์ฌ๋ก (๊ฒ์์ด: {query})\n"]
|
| 323 |
+
for i, item in enumerate(items[:5], 1):
|
| 324 |
+
title = item.get("title") or item.get("qnaTitle") or ""
|
| 325 |
+
content = item.get("content") or item.get("qnaContent") or item.get("question") or ""
|
| 326 |
+
answer = item.get("answer") or item.get("qnaAnswer") or ""
|
| 327 |
+
category = (
|
| 328 |
+
item.get("category") or item.get("minCategory") or item.get("main_sub_name") or ""
|
| 329 |
+
)
|
| 330 |
+
date = item.get("regDate") or item.get("date") or item.get("create_date") or ""
|
| 331 |
+
|
| 332 |
+
lines.append(f"{i}. [{category}] {title}")
|
| 333 |
+
if date:
|
| 334 |
+
lines.append(f" (๋ฑ๋ก์ผ: {date})")
|
| 335 |
+
if content:
|
| 336 |
+
preview = content[:200] + "..." if len(content) > 200 else content
|
| 337 |
+
lines.append(f" ๋ฏผ์: {preview}")
|
| 338 |
+
if answer:
|
| 339 |
+
ans_preview = answer[:200] + "..." if len(answer) > 200 else answer
|
| 340 |
+
lines.append(f" ๋ต๋ณ: {ans_preview}")
|
| 341 |
+
lines.append("")
|
| 342 |
+
|
| 343 |
+
return "\n".join(lines)
|
| 344 |
+
|
| 345 |
+
def _build_citations(self, items: List[Dict[str, Any]]) -> List[Citation]:
|
| 346 |
+
"""์์ดํ
๋ชฉ๋ก์์ Citation ๊ฐ์ฒด ๋ชฉ๋ก์ ์์ฑํ๋ค.
|
| 347 |
+
|
| 348 |
+
Parameters
|
| 349 |
+
----------
|
| 350 |
+
items : List[Dict[str, Any]]
|
| 351 |
+
API์์ ๋ฐํ๋ ์์ดํ
๋ชฉ๋ก.
|
| 352 |
+
|
| 353 |
+
Returns
|
| 354 |
+
-------
|
| 355 |
+
List[Citation]
|
| 356 |
+
Citation ๊ฐ์ฒด ๋ชฉ๋ก.
|
| 357 |
+
"""
|
| 358 |
+
citations = []
|
| 359 |
+
for item in items:
|
| 360 |
+
title = item.get("title") or item.get("qnaTitle") or ""
|
| 361 |
+
url = item.get("url") or item.get("detailUrl") or ""
|
| 362 |
+
date = item.get("regDate") or item.get("date") or item.get("create_date") or ""
|
| 363 |
+
content = item.get("content") or item.get("qnaContent") or item.get("question") or ""
|
| 364 |
+
snippet = content[:150] + "..." if len(content) > 150 else content
|
| 365 |
+
|
| 366 |
+
# ์ ๋ชฉ ์๋ ํญ๋ชฉ์ ์คํต
|
| 367 |
+
if not title:
|
| 368 |
+
continue
|
| 369 |
+
|
| 370 |
+
citations.append(
|
| 371 |
+
Citation(
|
| 372 |
+
title=title,
|
| 373 |
+
url=url,
|
| 374 |
+
date=date,
|
| 375 |
+
snippet=snippet,
|
| 376 |
+
metadata={k: v for k, v in item.items() if k not in ("content", "answer")},
|
| 377 |
+
)
|
| 378 |
+
)
|
| 379 |
+
return citations
|
| 380 |
+
|
| 381 |
+
def _enrich_query(self, query: str, context: Dict[str, Any]) -> str:
|
| 382 |
+
"""์ธ์
์์ฝ์ด๋ ์ต๊ทผ assistant ์๋ต์ ๋ฐ์ํด ๊ฒ์์ด๋ฅผ ๋ณด๊ฐํ๋ค.
|
| 383 |
+
|
| 384 |
+
Parameters
|
| 385 |
+
----------
|
| 386 |
+
query : str
|
| 387 |
+
์๋ณธ ์ฌ์ฉ์ ์ฟผ๋ฆฌ.
|
| 388 |
+
context : Dict[str, Any]
|
| 389 |
+
AgentLoop ๋์ ์ปจํ
์คํธ.
|
| 390 |
+
|
| 391 |
+
Returns
|
| 392 |
+
-------
|
| 393 |
+
str
|
| 394 |
+
๋ณด๊ฐ๋ ๊ฒ์์ด.
|
| 395 |
+
"""
|
| 396 |
+
query_variants = context.get("query_variants", {})
|
| 397 |
+
if isinstance(query_variants, dict):
|
| 398 |
+
prepared_query = str(query_variants.get("api_lookup", "")).strip()
|
| 399 |
+
if prepared_query:
|
| 400 |
+
return prepared_query
|
| 401 |
+
|
| 402 |
+
session_context = str(context.get("session_context", "")).strip()
|
| 403 |
+
if session_context:
|
| 404 |
+
recent_summary = " ".join(session_context.splitlines()[-2:]).strip()
|
| 405 |
+
if recent_summary and recent_summary not in query:
|
| 406 |
+
return f"{query} {recent_summary[:120]}".strip()
|
| 407 |
+
return query
|
| 408 |
+
|
| 409 |
+
# ---------------------------------------------------------------------------
|
| 410 |
+
# ๊ณตํต API ํธ์ถ ํฌํผ
|
| 411 |
+
# ---------------------------------------------------------------------------
|
| 412 |
+
|
| 413 |
+
async def _call_api(
|
| 414 |
+
self, endpoint: str, params: Dict[str, Any]
|
| 415 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 416 |
+
"""๊ณตํต API ํธ์ถ + ์๋ต ํ์ฑ.
|
| 417 |
+
|
| 418 |
+
Parameters
|
| 419 |
+
----------
|
| 420 |
+
endpoint : str
|
| 421 |
+
_BASE_URL ๋ค์ ๋ถ๋ ์๋ํฌ์ธํธ ๊ฒฝ๋ก.
|
| 422 |
+
params : Dict[str, Any]
|
| 423 |
+
์ฟผ๋ฆฌ ํ๋ผ๋ฏธํฐ (serviceKey, dataType ์๋ ์ถ๊ฐ).
|
| 424 |
+
|
| 425 |
+
Returns
|
| 426 |
+
-------
|
| 427 |
+
Optional[List[Dict[str, Any]]]
|
| 428 |
+
์ฑ๊ณต ์ ์์ดํ
๋ชฉ๋ก, ์คํจ ์ None.
|
| 429 |
+
"""
|
| 430 |
+
if not _HTTPX_AVAILABLE:
|
| 431 |
+
logger.warning("[minwon_analysis] httpx ๋ฏธ์ค์น")
|
| 432 |
+
return None
|
| 433 |
+
|
| 434 |
+
url = _BASE_URL + endpoint
|
| 435 |
+
params["serviceKey"] = self._api_key
|
| 436 |
+
params["dataType"] = "json"
|
| 437 |
+
|
| 438 |
+
try:
|
| 439 |
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
| 440 |
+
response = await client.get(url, params=params)
|
| 441 |
+
response.raise_for_status()
|
| 442 |
+
body = response.json()
|
| 443 |
+
except httpx.TimeoutException as exc:
|
| 444 |
+
logger.warning(f"[minwon_analysis] API ํ์์์ ({endpoint}): {exc}")
|
| 445 |
+
return None
|
| 446 |
+
except httpx.HTTPStatusError as exc:
|
| 447 |
+
logger.warning(
|
| 448 |
+
f"[minwon_analysis] HTTP ์ค๋ฅ ({endpoint}) " f"{exc.response.status_code}: {exc}"
|
| 449 |
+
)
|
| 450 |
+
return None
|
| 451 |
+
except Exception as exc:
|
| 452 |
+
logger.error(
|
| 453 |
+
f"[minwon_analysis] API ํธ์ถ ์ค๋ฅ ({endpoint}): {exc}",
|
| 454 |
+
exc_info=True,
|
| 455 |
+
)
|
| 456 |
+
return None
|
| 457 |
+
|
| 458 |
+
# ์ต์์ ๋ฐฐ์ด
|
| 459 |
+
if isinstance(body, list):
|
| 460 |
+
return body
|
| 461 |
+
|
| 462 |
+
# dict ๋ํ
|
| 463 |
+
if isinstance(body, dict):
|
| 464 |
+
if "returnObject" in body:
|
| 465 |
+
obj = body["returnObject"]
|
| 466 |
+
return obj if isinstance(obj, list) else []
|
| 467 |
+
# ์๋ฌ ์ฝ๋ ํ์ดํธ๋ฆฌ์คํธ (๊ธฐ์กด _call_similar_api์ ๋์ผ)
|
| 468 |
+
code = str(body.get("code", body.get("resultCode", "00")))
|
| 469 |
+
if code not in ("00", "0", "200", ""):
|
| 470 |
+
logger.warning(
|
| 471 |
+
f"[minwon_analysis] API ์๋ฌ ({endpoint}): code={code}, "
|
| 472 |
+
f"msg={body.get('msg', body.get('resultMsg', ''))}"
|
| 473 |
+
)
|
| 474 |
+
return None
|
| 475 |
+
# body > items ๊ฒฝ๋ก ํ์ฑ ์๋
|
| 476 |
+
return self._parse_similar_items(body)
|
| 477 |
+
|
| 478 |
+
return None
|
| 479 |
+
|
| 480 |
+
# ---------------------------------------------------------------------------
|
| 481 |
+
# ์ด์ ํ์ง API (issue_detector)
|
| 482 |
+
# ---------------------------------------------------------------------------
|
| 483 |
+
|
| 484 |
+
async def get_rising_keywords(
|
| 485 |
+
self,
|
| 486 |
+
analysis_time: str,
|
| 487 |
+
max_result: int = 10,
|
| 488 |
+
target: str = "pttn,dfpt,saeol",
|
| 489 |
+
main_sub_code: str = "1140100",
|
| 490 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 491 |
+
"""๊ธ์ฆํค์๋๋ฅผ ์กฐํํ๋ค.
|
| 492 |
+
|
| 493 |
+
Parameters
|
| 494 |
+
----------
|
| 495 |
+
analysis_time : str
|
| 496 |
+
๋ถ์ ์์ (์: "2021050614").
|
| 497 |
+
max_result : int
|
| 498 |
+
์ต๋ ๊ฒฐ๊ณผ ์.
|
| 499 |
+
target : str
|
| 500 |
+
๋์ ์ฑ๋.
|
| 501 |
+
main_sub_code : str
|
| 502 |
+
๊ธฐ๊ด ์ฝ๋.
|
| 503 |
+
"""
|
| 504 |
+
return await self._call_api(
|
| 505 |
+
"/minRisingKeyword5",
|
| 506 |
+
{
|
| 507 |
+
"analysisTime": analysis_time,
|
| 508 |
+
"maxResult": max_result,
|
| 509 |
+
"target": target,
|
| 510 |
+
"mainSubCode": main_sub_code,
|
| 511 |
+
},
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
async def get_today_topics(
|
| 515 |
+
self,
|
| 516 |
+
search_date: str,
|
| 517 |
+
top_n: int = 5,
|
| 518 |
+
target: str = "pttn,dfpt,saeol",
|
| 519 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 520 |
+
"""์ค๋ ์ด์ ํ ํฝ์ ์กฐํํ๋ค.
|
| 521 |
+
|
| 522 |
+
Parameters
|
| 523 |
+
----------
|
| 524 |
+
search_date : str
|
| 525 |
+
๊ฒ์ ๋ ์ง (์: "20210506").
|
| 526 |
+
top_n : int
|
| 527 |
+
์์ N๊ฐ.
|
| 528 |
+
target : str
|
| 529 |
+
๋์ ์ฑ๋.
|
| 530 |
+
"""
|
| 531 |
+
return await self._call_api(
|
| 532 |
+
"/minTodayTopicInfo5",
|
| 533 |
+
{
|
| 534 |
+
"searchDate": search_date,
|
| 535 |
+
"todayTopicTopN": top_n,
|
| 536 |
+
"target": target,
|
| 537 |
+
},
|
| 538 |
+
)
|
| 539 |
+
|
| 540 |
+
async def get_top_keywords_by_period(
|
| 541 |
+
self,
|
| 542 |
+
analysis_time: str,
|
| 543 |
+
period: str = "MONTHLY",
|
| 544 |
+
range_count: int = 1,
|
| 545 |
+
max_result: int = 5,
|
| 546 |
+
target: str = "pttn,dfpt,saeol",
|
| 547 |
+
main_sub_code: str = "1140100",
|
| 548 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 549 |
+
"""๊ธฐ๊ฐ๋ณ ์ต๋ค ํค์๋๋ฅผ ์กฐํํ๋ค.
|
| 550 |
+
|
| 551 |
+
Parameters
|
| 552 |
+
----------
|
| 553 |
+
analysis_time : str
|
| 554 |
+
๋ถ์ ์์ ์์ (์: "20210301").
|
| 555 |
+
period : str
|
| 556 |
+
๊ธฐ๊ฐ ๋จ์ ("DAILY" | "WEEKLY" | "MONTHLY").
|
| 557 |
+
range_count : int
|
| 558 |
+
๊ธฐ๊ฐ ๋ฒ์ ์.
|
| 559 |
+
max_result : int
|
| 560 |
+
์ต๋ ๊ฒฐ๊ณผ ์.
|
| 561 |
+
target : str
|
| 562 |
+
๋์ ์ฑ๋.
|
| 563 |
+
main_sub_code : str
|
| 564 |
+
๊ธฐ๊ด ์ฝ๋.
|
| 565 |
+
"""
|
| 566 |
+
return await self._call_api(
|
| 567 |
+
"/minDFTopNKeyword5",
|
| 568 |
+
{
|
| 569 |
+
"target": target,
|
| 570 |
+
"period": period,
|
| 571 |
+
"analysisTime": analysis_time,
|
| 572 |
+
"rangeCount": range_count,
|
| 573 |
+
"maxResult": max_result,
|
| 574 |
+
"mainSubCode": main_sub_code,
|
| 575 |
+
},
|
| 576 |
+
)
|
| 577 |
+
|
| 578 |
+
# ---------------------------------------------------------------------------
|
| 579 |
+
# ํต๊ณ API (stats_lookup)
|
| 580 |
+
# ---------------------------------------------------------------------------
|
| 581 |
+
|
| 582 |
+
async def get_statistics(
|
| 583 |
+
self,
|
| 584 |
+
date_from: str,
|
| 585 |
+
date_to: str,
|
| 586 |
+
period: str = "DAILY",
|
| 587 |
+
target: str = "pttn,dfpt,saeol",
|
| 588 |
+
sort_by: str = "NAME",
|
| 589 |
+
sort_order: str = "false",
|
| 590 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 591 |
+
"""๋ง์ถคํ ํต๊ณ๋ฅผ ์กฐํํ๋ค.
|
| 592 |
+
|
| 593 |
+
Parameters
|
| 594 |
+
----------
|
| 595 |
+
date_from : str
|
| 596 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 597 |
+
date_to : str
|
| 598 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 599 |
+
period : str
|
| 600 |
+
๊ธฐ๊ฐ ๋จ์.
|
| 601 |
+
target : str
|
| 602 |
+
๋์ ์ฑ๋.
|
| 603 |
+
sort_by : str
|
| 604 |
+
์ ๋ ฌ ๊ธฐ์ค.
|
| 605 |
+
sort_order : str
|
| 606 |
+
์ ๋ ฌ ์์ ("true" ์ค๋ฆ์ฐจ์, "false" ๋ด๋ฆผ์ฐจ์).
|
| 607 |
+
"""
|
| 608 |
+
return await self._call_api(
|
| 609 |
+
"/minStaticsInfo5",
|
| 610 |
+
{
|
| 611 |
+
"target": target,
|
| 612 |
+
"dateFrom": date_from,
|
| 613 |
+
"dateTo": date_to,
|
| 614 |
+
"period": period,
|
| 615 |
+
"sortBy": sort_by,
|
| 616 |
+
"sortOrder": sort_order,
|
| 617 |
+
},
|
| 618 |
+
)
|
| 619 |
+
|
| 620 |
+
async def get_trend(
|
| 621 |
+
self,
|
| 622 |
+
date_from: str,
|
| 623 |
+
date_to: str,
|
| 624 |
+
period: str = "DAILY",
|
| 625 |
+
target: str = "pttn,dfpt,saeol",
|
| 626 |
+
sort_by: str = "NAME",
|
| 627 |
+
sort_order: str = "false",
|
| 628 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 629 |
+
"""๋ฏผ์ ํธ๋ ๋(์๊ณ์ด)๋ฅผ ์กฐํํ๋ค.
|
| 630 |
+
|
| 631 |
+
Parameters
|
| 632 |
+
----------
|
| 633 |
+
date_from : str
|
| 634 |
+
์์ ๋ ์ง์๊ฐ (YYYYMMDDHH).
|
| 635 |
+
date_to : str
|
| 636 |
+
์ข
๋ฃ ๋ ์ง์๊ฐ (YYYYMMDDHH).
|
| 637 |
+
period : str
|
| 638 |
+
๊ธฐ๊ฐ ๋จ์.
|
| 639 |
+
target : str
|
| 640 |
+
๋์ ์ฑ๋.
|
| 641 |
+
sort_by : str
|
| 642 |
+
์ ๋ ฌ ๊ธฐ์ค.
|
| 643 |
+
sort_order : str
|
| 644 |
+
์ ๋ ฌ ์์.
|
| 645 |
+
"""
|
| 646 |
+
return await self._call_api(
|
| 647 |
+
"/minTimeSeriseView5",
|
| 648 |
+
{
|
| 649 |
+
"target": target,
|
| 650 |
+
"dateFrom": date_from,
|
| 651 |
+
"dateTo": date_to,
|
| 652 |
+
"period": period,
|
| 653 |
+
"sortBy": sort_by,
|
| 654 |
+
"sortOrder": sort_order,
|
| 655 |
+
},
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
async def get_doc_count(
|
| 659 |
+
self,
|
| 660 |
+
date_from: str,
|
| 661 |
+
date_to: str,
|
| 662 |
+
searchword: str,
|
| 663 |
+
target: str = "pttn,dfpt,saeol",
|
| 664 |
+
min_score: int = 70,
|
| 665 |
+
omit_duplicate: bool = False,
|
| 666 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 667 |
+
"""๋ฏผ์ ๊ฑด์๋ฅผ ์กฐํํ๋ค.
|
| 668 |
+
|
| 669 |
+
Parameters
|
| 670 |
+
----------
|
| 671 |
+
date_from : str
|
| 672 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 673 |
+
date_to : str
|
| 674 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 675 |
+
searchword : str
|
| 676 |
+
๊ฒ์์ด.
|
| 677 |
+
target : str
|
| 678 |
+
๋์ ์ฑ๋.
|
| 679 |
+
min_score : int
|
| 680 |
+
์ต์ ์ ์ฌ๋ ์ ์.
|
| 681 |
+
omit_duplicate : bool
|
| 682 |
+
์ค๋ณต ์ ๊ฑฐ ์ฌ๋ถ.
|
| 683 |
+
"""
|
| 684 |
+
return await self._call_api(
|
| 685 |
+
"/minSearchDocCnt5",
|
| 686 |
+
{
|
| 687 |
+
"dateFrom": date_from,
|
| 688 |
+
"dateTo": date_to,
|
| 689 |
+
"target": target,
|
| 690 |
+
"minScore": min_score,
|
| 691 |
+
"searchword": searchword,
|
| 692 |
+
"omitDuplicate": str(omit_duplicate).lower(),
|
| 693 |
+
},
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
async def get_org_ranking(
|
| 697 |
+
self,
|
| 698 |
+
date_from: str,
|
| 699 |
+
date_to: str,
|
| 700 |
+
top_n: int = 5,
|
| 701 |
+
target: str = "pttn,dfpt,saeol",
|
| 702 |
+
sort_by: str = "VALUE",
|
| 703 |
+
sort_order: str = "false",
|
| 704 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 705 |
+
"""๊ธฐ๊ด๋ณ ๋ฏผ์ ์์๋ฅผ ์กฐํํ๋ค.
|
| 706 |
+
|
| 707 |
+
Parameters
|
| 708 |
+
----------
|
| 709 |
+
date_from : str
|
| 710 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 711 |
+
date_to : str
|
| 712 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 713 |
+
top_n : int
|
| 714 |
+
์์ N๊ฐ.
|
| 715 |
+
target : str
|
| 716 |
+
๋์ ์ฑ๋.
|
| 717 |
+
sort_by : str
|
| 718 |
+
์ ๋ ฌ ๊ธฐ์ค.
|
| 719 |
+
sort_order : str
|
| 720 |
+
์ ๋ ฌ ์์.
|
| 721 |
+
"""
|
| 722 |
+
return await self._call_api(
|
| 723 |
+
"/minMofacetInfo5",
|
| 724 |
+
{
|
| 725 |
+
"topN": top_n,
|
| 726 |
+
"sortBy": sort_by,
|
| 727 |
+
"sortOrder": sort_order,
|
| 728 |
+
"target": target,
|
| 729 |
+
"dateFrom": date_from,
|
| 730 |
+
"dateTo": date_to,
|
| 731 |
+
},
|
| 732 |
+
)
|
| 733 |
+
|
| 734 |
+
async def get_region_ranking(
|
| 735 |
+
self,
|
| 736 |
+
date_from: str,
|
| 737 |
+
date_to: str,
|
| 738 |
+
top_n: int = 5,
|
| 739 |
+
target: str = "pttn,dfpt,saeol",
|
| 740 |
+
sort_by: str = "VALUE",
|
| 741 |
+
sort_order: str = "false",
|
| 742 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 743 |
+
"""์ง์ญ๋ณ ๋ฏผ์ ์์๋ฅผ ์กฐํํ๋ค.
|
| 744 |
+
|
| 745 |
+
Parameters
|
| 746 |
+
----------
|
| 747 |
+
date_from : str
|
| 748 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 749 |
+
date_to : str
|
| 750 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 751 |
+
top_n : int
|
| 752 |
+
์์ N๊ฐ.
|
| 753 |
+
target : str
|
| 754 |
+
๋์ ์ฑ๋.
|
| 755 |
+
sort_by : str
|
| 756 |
+
์ ๋ ฌ ๊ธฐ์ค.
|
| 757 |
+
sort_order : str
|
| 758 |
+
์ ๋ ฌ ์์.
|
| 759 |
+
"""
|
| 760 |
+
return await self._call_api(
|
| 761 |
+
"/minMrfacetInfo5",
|
| 762 |
+
{
|
| 763 |
+
"topN": top_n,
|
| 764 |
+
"sortBy": sort_by,
|
| 765 |
+
"sortOrder": sort_order,
|
| 766 |
+
"dateFrom": date_from,
|
| 767 |
+
"dateTo": date_to,
|
| 768 |
+
"target": target,
|
| 769 |
+
},
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
# ---------------------------------------------------------------------------
|
| 773 |
+
# ํค์๋ ๋ถ์ API (keyword_analyzer)
|
| 774 |
+
# ---------------------------------------------------------------------------
|
| 775 |
+
|
| 776 |
+
async def get_core_keywords(
|
| 777 |
+
self,
|
| 778 |
+
date_from: str,
|
| 779 |
+
date_to: str,
|
| 780 |
+
result_count: int = 5,
|
| 781 |
+
target: str = "pttn,dfpt,saeol",
|
| 782 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 783 |
+
"""ํต์ฌ ํค์๋๋ฅผ ์กฐํํ๋ค.
|
| 784 |
+
|
| 785 |
+
Parameters
|
| 786 |
+
----------
|
| 787 |
+
date_from : str
|
| 788 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 789 |
+
date_to : str
|
| 790 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 791 |
+
result_count : int
|
| 792 |
+
๊ฒฐ๊ณผ ์.
|
| 793 |
+
target : str
|
| 794 |
+
๋์ ์ฑ๋.
|
| 795 |
+
"""
|
| 796 |
+
return await self._call_api(
|
| 797 |
+
"/minTopNKeyword5",
|
| 798 |
+
{
|
| 799 |
+
"target": target,
|
| 800 |
+
"dateFrom": date_from,
|
| 801 |
+
"dateTo": date_to,
|
| 802 |
+
"resultCount": result_count,
|
| 803 |
+
},
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
async def get_related_words(
|
| 807 |
+
self,
|
| 808 |
+
date_from: str,
|
| 809 |
+
date_to: str,
|
| 810 |
+
searchword: str,
|
| 811 |
+
result_count: int = 5,
|
| 812 |
+
target: str = "pttn,dfpt,saeol",
|
| 813 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 814 |
+
"""์ฐ๊ด์ด๋ฅผ ์กฐํํ๋ค.
|
| 815 |
+
|
| 816 |
+
Parameters
|
| 817 |
+
----------
|
| 818 |
+
date_from : str
|
| 819 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 820 |
+
date_to : str
|
| 821 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 822 |
+
searchword : str
|
| 823 |
+
๊ฒ์์ด.
|
| 824 |
+
result_count : int
|
| 825 |
+
๊ฒฐ๊ณผ ์.
|
| 826 |
+
target : str
|
| 827 |
+
๋์ ์ฑ๋.
|
| 828 |
+
"""
|
| 829 |
+
return await self._call_api(
|
| 830 |
+
"/minWdcloudInfo5",
|
| 831 |
+
{
|
| 832 |
+
"target": target,
|
| 833 |
+
"searchword": searchword,
|
| 834 |
+
"dateFrom": date_from,
|
| 835 |
+
"dateTo": date_to,
|
| 836 |
+
"resultCount": result_count,
|
| 837 |
+
},
|
| 838 |
+
)
|
| 839 |
+
|
| 840 |
+
# ---------------------------------------------------------------------------
|
| 841 |
+
# ์ธ๊ตฌํต๊ณ API (demographics_lookup)
|
| 842 |
+
# ---------------------------------------------------------------------------
|
| 843 |
+
|
| 844 |
+
async def get_gender_stats(
|
| 845 |
+
self,
|
| 846 |
+
date_from: str,
|
| 847 |
+
date_to: str,
|
| 848 |
+
searchword: str,
|
| 849 |
+
target: str = "pttn",
|
| 850 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 851 |
+
"""์ฑ๋ณ ํต๊ณ๋ฅผ ์กฐํํ๋ค.
|
| 852 |
+
|
| 853 |
+
Parameters
|
| 854 |
+
----------
|
| 855 |
+
date_from : str
|
| 856 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 857 |
+
date_to : str
|
| 858 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 859 |
+
searchword : str
|
| 860 |
+
๊ฒ์์ด.
|
| 861 |
+
target : str
|
| 862 |
+
๋์ ์ฑ๋.
|
| 863 |
+
"""
|
| 864 |
+
return await self._call_api(
|
| 865 |
+
"/minPttnStstGndrInfo5",
|
| 866 |
+
{
|
| 867 |
+
"dateFrom": date_from,
|
| 868 |
+
"dateTo": date_to,
|
| 869 |
+
"target": target,
|
| 870 |
+
"searchword": searchword,
|
| 871 |
+
},
|
| 872 |
+
)
|
| 873 |
+
|
| 874 |
+
async def get_age_stats(
|
| 875 |
+
self,
|
| 876 |
+
date_from: str,
|
| 877 |
+
date_to: str,
|
| 878 |
+
searchword: str,
|
| 879 |
+
target: str = "pttn",
|
| 880 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 881 |
+
"""์ฐ๋ น๋ณ ํต๊ณ๋ฅผ ์กฐํํ๋ค.
|
| 882 |
+
|
| 883 |
+
Parameters
|
| 884 |
+
----------
|
| 885 |
+
date_from : str
|
| 886 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 887 |
+
date_to : str
|
| 888 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 889 |
+
searchword : str
|
| 890 |
+
๊ฒ์์ด.
|
| 891 |
+
target : str
|
| 892 |
+
๋์ ์ฑ๋.
|
| 893 |
+
"""
|
| 894 |
+
return await self._call_api(
|
| 895 |
+
"/minPttnStstAgeInfo5",
|
| 896 |
+
{
|
| 897 |
+
"dateFrom": date_from,
|
| 898 |
+
"dateTo": date_to,
|
| 899 |
+
"target": target,
|
| 900 |
+
"searchword": searchword,
|
| 901 |
+
},
|
| 902 |
+
)
|
| 903 |
+
|
| 904 |
+
async def get_population_ratio(
|
| 905 |
+
self,
|
| 906 |
+
date_from: str,
|
| 907 |
+
date_to: str,
|
| 908 |
+
top_n: int = 5,
|
| 909 |
+
target: str = "pttn,saeol,dfpt",
|
| 910 |
+
period: str = "DAILY",
|
| 911 |
+
sort_by: str = "VALUE",
|
| 912 |
+
sort_order: str = "false",
|
| 913 |
+
date_type: str = "C",
|
| 914 |
+
search_type: str = "REGION",
|
| 915 |
+
) -> Optional[List[Dict[str, Any]]]:
|
| 916 |
+
"""์ธ๊ตฌ๋๋น ๋ฏผ์ ๋น์จ์ ์กฐํํ๋ค.
|
| 917 |
+
|
| 918 |
+
Parameters
|
| 919 |
+
----------
|
| 920 |
+
date_from : str
|
| 921 |
+
์์ ๋ ์ง (YYYYMMDD).
|
| 922 |
+
date_to : str
|
| 923 |
+
์ข
๋ฃ ๋ ์ง (YYYYMMDD).
|
| 924 |
+
top_n : int
|
| 925 |
+
์์ N๊ฐ.
|
| 926 |
+
target : str
|
| 927 |
+
๋์ ์ฑ๋.
|
| 928 |
+
period : str
|
| 929 |
+
๊ธฐ๊ฐ ๋จ์.
|
| 930 |
+
sort_by : str
|
| 931 |
+
์ ๋ ฌ ๊ธฐ์ค.
|
| 932 |
+
sort_order : str
|
| 933 |
+
์ ๋ ฌ ์์.
|
| 934 |
+
date_type : str
|
| 935 |
+
๋ ์ง ์ ํ ("C" ์ ์์ผ, "R" ๋ฑ๋ก์ผ).
|
| 936 |
+
search_type : str
|
| 937 |
+
๊ฒ์ ์ ํ ("REGION" ์ง์ญ๋ณ).
|
| 938 |
+
"""
|
| 939 |
+
return await self._call_api(
|
| 940 |
+
"/minMrPopltnRtInfo5",
|
| 941 |
+
{
|
| 942 |
+
"target": target,
|
| 943 |
+
"dateFrom": date_from,
|
| 944 |
+
"dateTo": date_to,
|
| 945 |
+
"dateType": date_type,
|
| 946 |
+
"topN": top_n,
|
| 947 |
+
"period": period,
|
| 948 |
+
"sortBy": sort_by,
|
| 949 |
+
"sortOrder": sort_order,
|
| 950 |
+
"searchType": search_type,
|
| 951 |
+
},
|
| 952 |
+
)
|
src/inference/agent_loop.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""์ธ์
๊ธฐ๋ฐ task loop."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
import time
|
| 7 |
+
import uuid
|
| 8 |
+
from dataclasses import dataclass, field
|
| 9 |
+
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
|
| 10 |
+
|
| 11 |
+
from loguru import logger
|
| 12 |
+
|
| 13 |
+
from .query_builder import build_query_variants, build_runtime_query_context, resolve_tool_query
|
| 14 |
+
from .session_context import SessionContext
|
| 15 |
+
from .tool_router import ExecutionPlan, ToolName, ToolRouter, ToolStep, ToolType, tool_name
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class ToolResult:
|
| 20 |
+
tool: ToolName
|
| 21 |
+
success: bool
|
| 22 |
+
data: Dict[str, Any] = field(default_factory=dict)
|
| 23 |
+
error: Optional[str] = None
|
| 24 |
+
latency_ms: float = 0.0
|
| 25 |
+
|
| 26 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 27 |
+
return {
|
| 28 |
+
"tool": tool_name(self.tool),
|
| 29 |
+
"success": self.success,
|
| 30 |
+
"data": self.data,
|
| 31 |
+
"error": self.error,
|
| 32 |
+
"latency_ms": round(self.latency_ms, 2),
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class AgentTrace:
|
| 38 |
+
request_id: str
|
| 39 |
+
session_id: str
|
| 40 |
+
plan: Optional[ExecutionPlan] = None
|
| 41 |
+
tool_results: List[ToolResult] = field(default_factory=list)
|
| 42 |
+
total_latency_ms: float = 0.0
|
| 43 |
+
final_text: str = ""
|
| 44 |
+
error: Optional[str] = None
|
| 45 |
+
|
| 46 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 47 |
+
return {
|
| 48 |
+
"request_id": self.request_id,
|
| 49 |
+
"session_id": self.session_id,
|
| 50 |
+
"plan": self.plan.tool_names if self.plan else [],
|
| 51 |
+
"plan_reason": self.plan.reason if self.plan else "",
|
| 52 |
+
"tool_results": [result.to_dict() for result in self.tool_results],
|
| 53 |
+
"total_latency_ms": round(self.total_latency_ms, 2),
|
| 54 |
+
"error": self.error,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
ToolFunction = Callable[..., Any]
|
| 59 |
+
DEFAULT_TOOL_TIMEOUT = 30.0
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class AgentLoop:
|
| 63 |
+
"""GovOn MVP capability loop."""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
tool_registry: Dict[ToolName, ToolFunction],
|
| 68 |
+
router: Optional[ToolRouter] = None,
|
| 69 |
+
tool_timeout: float = DEFAULT_TOOL_TIMEOUT,
|
| 70 |
+
) -> None:
|
| 71 |
+
self._tools = {tool_name(name): runner for name, runner in tool_registry.items()}
|
| 72 |
+
self._router = router or ToolRouter()
|
| 73 |
+
self._tool_timeout = tool_timeout
|
| 74 |
+
|
| 75 |
+
async def run(
|
| 76 |
+
self,
|
| 77 |
+
query: str,
|
| 78 |
+
session: SessionContext,
|
| 79 |
+
request_id: Optional[str] = None,
|
| 80 |
+
force_tools: Optional[List[ToolName]] = None,
|
| 81 |
+
) -> AgentTrace:
|
| 82 |
+
rid = request_id or str(uuid.uuid4())
|
| 83 |
+
trace = AgentTrace(request_id=rid, session_id=session.session_id)
|
| 84 |
+
loop_start = time.monotonic()
|
| 85 |
+
started_at = time.time()
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
session.add_turn("user", query)
|
| 89 |
+
|
| 90 |
+
has_context = bool(session.tool_runs or session.conversations)
|
| 91 |
+
plan = self._router.plan(query, has_context=has_context, force_tools=force_tools)
|
| 92 |
+
trace.plan = plan
|
| 93 |
+
|
| 94 |
+
accumulated: Dict[str, Any] = build_runtime_query_context(session, query)
|
| 95 |
+
accumulated["conversation"] = [
|
| 96 |
+
{"role": turn.role, "content": turn.content} for turn in session.recent_history[-5:]
|
| 97 |
+
]
|
| 98 |
+
accumulated["query_variants"] = build_query_variants(
|
| 99 |
+
query,
|
| 100 |
+
tool_names=plan.tool_names,
|
| 101 |
+
context=accumulated,
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
for step in plan.steps:
|
| 105 |
+
result = await self._execute_tool(step, accumulated, session)
|
| 106 |
+
trace.tool_results.append(result)
|
| 107 |
+
accumulated[step.step_id] = result.data if result.success else {}
|
| 108 |
+
session.add_tool_run(
|
| 109 |
+
tool=step.step_id,
|
| 110 |
+
graph_run_request_id=rid,
|
| 111 |
+
success=result.success,
|
| 112 |
+
latency_ms=result.latency_ms,
|
| 113 |
+
error=result.error,
|
| 114 |
+
metadata=self._build_tool_log_metadata(result.data),
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
trace.final_text = self._extract_final_text(accumulated, plan)
|
| 118 |
+
session.add_turn("assistant", trace.final_text)
|
| 119 |
+
|
| 120 |
+
except Exception as exc:
|
| 121 |
+
trace.error = str(exc)
|
| 122 |
+
logger.error(f"[AgentLoop] request_id={rid} ์ค๋ฅ: {exc}", exc_info=True)
|
| 123 |
+
finally:
|
| 124 |
+
trace.total_latency_ms = (time.monotonic() - loop_start) * 1000
|
| 125 |
+
self._record_graph_run(
|
| 126 |
+
session=session,
|
| 127 |
+
trace=trace,
|
| 128 |
+
started_at=started_at,
|
| 129 |
+
completed_at=time.time(),
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
return trace
|
| 133 |
+
|
| 134 |
+
async def run_stream(
|
| 135 |
+
self,
|
| 136 |
+
query: str,
|
| 137 |
+
session: SessionContext,
|
| 138 |
+
request_id: Optional[str] = None,
|
| 139 |
+
force_tools: Optional[List[ToolName]] = None,
|
| 140 |
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
| 141 |
+
rid = request_id or str(uuid.uuid4())
|
| 142 |
+
loop_start = time.monotonic()
|
| 143 |
+
started_at = time.time()
|
| 144 |
+
trace = AgentTrace(request_id=rid, session_id=session.session_id)
|
| 145 |
+
|
| 146 |
+
try:
|
| 147 |
+
session.add_turn("user", query)
|
| 148 |
+
has_context = bool(session.tool_runs or session.conversations)
|
| 149 |
+
plan = self._router.plan(query, has_context=has_context, force_tools=force_tools)
|
| 150 |
+
trace.plan = plan
|
| 151 |
+
|
| 152 |
+
yield {
|
| 153 |
+
"type": "plan",
|
| 154 |
+
"request_id": rid,
|
| 155 |
+
"plan": plan.tool_names,
|
| 156 |
+
"reason": plan.reason,
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
accumulated: Dict[str, Any] = build_runtime_query_context(session, query)
|
| 160 |
+
accumulated["query_variants"] = build_query_variants(
|
| 161 |
+
query,
|
| 162 |
+
tool_names=plan.tool_names,
|
| 163 |
+
context=accumulated,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
for step in plan.steps:
|
| 167 |
+
yield {"type": "tool_start", "request_id": rid, "tool": step.step_id}
|
| 168 |
+
result = await self._execute_tool(step, accumulated, session)
|
| 169 |
+
trace.tool_results.append(result)
|
| 170 |
+
accumulated[step.step_id] = result.data if result.success else {}
|
| 171 |
+
session.add_tool_run(
|
| 172 |
+
tool=step.step_id,
|
| 173 |
+
graph_run_request_id=rid,
|
| 174 |
+
success=result.success,
|
| 175 |
+
latency_ms=result.latency_ms,
|
| 176 |
+
error=result.error,
|
| 177 |
+
metadata=self._build_tool_log_metadata(result.data),
|
| 178 |
+
)
|
| 179 |
+
yield {
|
| 180 |
+
"type": "tool_result",
|
| 181 |
+
"request_id": rid,
|
| 182 |
+
"tool": step.step_id,
|
| 183 |
+
"success": result.success,
|
| 184 |
+
"latency_ms": round(result.latency_ms, 2),
|
| 185 |
+
"error": result.error,
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
trace.final_text = self._extract_final_text(accumulated, plan)
|
| 189 |
+
session.add_turn("assistant", trace.final_text)
|
| 190 |
+
trace.total_latency_ms = (time.monotonic() - loop_start) * 1000
|
| 191 |
+
yield {
|
| 192 |
+
"type": "final",
|
| 193 |
+
"request_id": rid,
|
| 194 |
+
"text": trace.final_text,
|
| 195 |
+
"trace": trace.to_dict(),
|
| 196 |
+
"finished": True,
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
except Exception as exc:
|
| 200 |
+
trace.error = str(exc)
|
| 201 |
+
trace.total_latency_ms = (time.monotonic() - loop_start) * 1000
|
| 202 |
+
logger.error(f"[AgentLoop] stream request_id={rid} ์ค๋ฅ: {exc}", exc_info=True)
|
| 203 |
+
yield {
|
| 204 |
+
"type": "error",
|
| 205 |
+
"request_id": rid,
|
| 206 |
+
"error": "์์ด์ ํธ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
| 207 |
+
"finished": True,
|
| 208 |
+
}
|
| 209 |
+
finally:
|
| 210 |
+
if trace.total_latency_ms == 0.0:
|
| 211 |
+
trace.total_latency_ms = (time.monotonic() - loop_start) * 1000
|
| 212 |
+
self._record_graph_run(
|
| 213 |
+
session=session,
|
| 214 |
+
trace=trace,
|
| 215 |
+
started_at=started_at,
|
| 216 |
+
completed_at=time.time(),
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
async def _execute_tool(
|
| 220 |
+
self,
|
| 221 |
+
step: ToolStep,
|
| 222 |
+
accumulated: Dict[str, Any],
|
| 223 |
+
session: SessionContext,
|
| 224 |
+
) -> ToolResult:
|
| 225 |
+
step_name = step.step_id
|
| 226 |
+
tool_fn = self._tools.get(step_name)
|
| 227 |
+
if tool_fn is None:
|
| 228 |
+
return ToolResult(
|
| 229 |
+
tool=step.tool, success=False, error=f"๋ฑ๋ก๋์ง ์์ tool: {step_name}"
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
start = time.monotonic()
|
| 233 |
+
try:
|
| 234 |
+
execution_query = resolve_tool_query(step_name, accumulated)
|
| 235 |
+
result_data = await asyncio.wait_for(
|
| 236 |
+
tool_fn(
|
| 237 |
+
query=execution_query,
|
| 238 |
+
context=accumulated,
|
| 239 |
+
session=session,
|
| 240 |
+
),
|
| 241 |
+
timeout=self._tool_timeout,
|
| 242 |
+
)
|
| 243 |
+
return ToolResult(
|
| 244 |
+
tool=step.tool,
|
| 245 |
+
success=True,
|
| 246 |
+
data=result_data if isinstance(result_data, dict) else {"result": result_data},
|
| 247 |
+
latency_ms=(time.monotonic() - start) * 1000,
|
| 248 |
+
)
|
| 249 |
+
except asyncio.TimeoutError:
|
| 250 |
+
return ToolResult(
|
| 251 |
+
tool=step.tool,
|
| 252 |
+
success=False,
|
| 253 |
+
error=f"tool {step_name} ํ์์์ ({self._tool_timeout}์ด)",
|
| 254 |
+
latency_ms=(time.monotonic() - start) * 1000,
|
| 255 |
+
)
|
| 256 |
+
except Exception as exc:
|
| 257 |
+
logger.error(f"[AgentLoop] tool {step_name} ์คํ ์ค๋ฅ: {exc}", exc_info=True)
|
| 258 |
+
return ToolResult(
|
| 259 |
+
tool=step.tool,
|
| 260 |
+
success=False,
|
| 261 |
+
error=str(exc),
|
| 262 |
+
latency_ms=(time.monotonic() - start) * 1000,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
@staticmethod
|
| 266 |
+
def _build_tool_log_metadata(data: Dict[str, Any]) -> Dict[str, Any]:
|
| 267 |
+
"""tool log์ ๋จ๊ธธ ์์ preview๋ง ๋ณด๊ดํ๋ค."""
|
| 268 |
+
metadata: Dict[str, Any] = {}
|
| 269 |
+
if "count" in data:
|
| 270 |
+
metadata["count"] = data["count"]
|
| 271 |
+
if "query" in data:
|
| 272 |
+
metadata["query"] = data["query"]
|
| 273 |
+
if "results" in data and isinstance(data["results"], list):
|
| 274 |
+
metadata["result_count"] = len(data["results"])
|
| 275 |
+
if "text" in data:
|
| 276 |
+
metadata["text_preview"] = str(data["text"])[:200]
|
| 277 |
+
return metadata
|
| 278 |
+
|
| 279 |
+
@staticmethod
|
| 280 |
+
def _build_plan_summary(plan: Optional[ExecutionPlan]) -> str:
|
| 281 |
+
if not plan:
|
| 282 |
+
return ""
|
| 283 |
+
|
| 284 |
+
tools = " -> ".join(step.step_id for step in plan.steps)
|
| 285 |
+
if plan.reason:
|
| 286 |
+
return f"{plan.reason} | tools: {tools}"
|
| 287 |
+
return tools
|
| 288 |
+
|
| 289 |
+
@staticmethod
|
| 290 |
+
def _graph_run_status(trace: AgentTrace) -> str:
|
| 291 |
+
if trace.error:
|
| 292 |
+
return "failed"
|
| 293 |
+
if any(not result.success for result in trace.tool_results):
|
| 294 |
+
return "completed_with_errors"
|
| 295 |
+
return "completed"
|
| 296 |
+
|
| 297 |
+
@classmethod
|
| 298 |
+
def _record_graph_run(
|
| 299 |
+
cls,
|
| 300 |
+
session: SessionContext,
|
| 301 |
+
trace: AgentTrace,
|
| 302 |
+
started_at: float,
|
| 303 |
+
completed_at: float,
|
| 304 |
+
) -> None:
|
| 305 |
+
success_count = sum(1 for result in trace.tool_results if result.success)
|
| 306 |
+
failure_count = len(trace.tool_results) - success_count
|
| 307 |
+
session.add_graph_run(
|
| 308 |
+
request_id=trace.request_id,
|
| 309 |
+
plan_summary=cls._build_plan_summary(trace.plan),
|
| 310 |
+
approval_status="not_requested",
|
| 311 |
+
executed_capabilities=[tool_name(result.tool) for result in trace.tool_results],
|
| 312 |
+
status=cls._graph_run_status(trace),
|
| 313 |
+
error=trace.error,
|
| 314 |
+
total_latency_ms=trace.total_latency_ms,
|
| 315 |
+
metadata={
|
| 316 |
+
"plan_reason": trace.plan.reason if trace.plan else "",
|
| 317 |
+
"tool_result_count": len(trace.tool_results),
|
| 318 |
+
"success_count": success_count,
|
| 319 |
+
"failure_count": failure_count,
|
| 320 |
+
"final_text_preview": trace.final_text[:200],
|
| 321 |
+
},
|
| 322 |
+
started_at=started_at,
|
| 323 |
+
completed_at=completed_at,
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
+
@staticmethod
|
| 327 |
+
def _extract_final_text(accumulated: Dict[str, Any], plan: ExecutionPlan) -> str:
|
| 328 |
+
for tool_type in (ToolType.APPEND_EVIDENCE, ToolType.DRAFT_CIVIL_RESPONSE):
|
| 329 |
+
payload = accumulated.get(tool_type.value, {})
|
| 330 |
+
if isinstance(payload, dict) and payload.get("text"):
|
| 331 |
+
return str(payload["text"])
|
| 332 |
+
|
| 333 |
+
for step in plan.steps:
|
| 334 |
+
payload = accumulated.get(step.step_id, {})
|
| 335 |
+
if isinstance(payload, dict) and payload.get("text"):
|
| 336 |
+
return str(payload["text"])
|
| 337 |
+
|
| 338 |
+
parts: List[str] = []
|
| 339 |
+
|
| 340 |
+
rag_data = accumulated.get(ToolType.RAG_SEARCH.value, {})
|
| 341 |
+
if rag_data.get("results"):
|
| 342 |
+
lines = ["[๋ก์ปฌ ๋ฌธ์ ๊ทผ๊ฑฐ]"]
|
| 343 |
+
for item in rag_data["results"][:3]:
|
| 344 |
+
title = item.get("title", "")
|
| 345 |
+
content = item.get("content", "")[:120]
|
| 346 |
+
lines.append(f"- {title}: {content}")
|
| 347 |
+
parts.append("\n".join(lines))
|
| 348 |
+
|
| 349 |
+
api_data = accumulated.get(ToolType.API_LOOKUP.value, {})
|
| 350 |
+
if api_data.get("context_text"):
|
| 351 |
+
parts.append(api_data["context_text"])
|
| 352 |
+
elif api_data.get("results"):
|
| 353 |
+
lines = ["[์ธ๋ถ ์กฐํ ๊ฒฐ๊ณผ]"]
|
| 354 |
+
for item in api_data["results"][:3]:
|
| 355 |
+
title = item.get("title", item.get("qnaTitle", ""))
|
| 356 |
+
content = item.get("content", item.get("qnaContent", ""))[:120]
|
| 357 |
+
lines.append(f"- {title}: {content}")
|
| 358 |
+
parts.append("\n".join(lines))
|
| 359 |
+
|
| 360 |
+
return "\n\n".join(parts) if parts else "์์ฒญ์ ์ฒ๋ฆฌํ ์ ์์ต๋๋ค."
|
src/inference/agent_manager.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent Manager for multi-persona prompt construction.
|
| 3 |
+
|
| 4 |
+
Loads agent persona definitions from Markdown files with YAML frontmatter,
|
| 5 |
+
and builds EXAONE chat template prompts with the appropriate system message.
|
| 6 |
+
|
| 7 |
+
Issue: #56
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import re
|
| 12 |
+
from typing import Dict, List, Optional
|
| 13 |
+
|
| 14 |
+
import yaml
|
| 15 |
+
from loguru import logger
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AgentPersona:
|
| 19 |
+
"""Parsed agent persona from a Markdown file."""
|
| 20 |
+
|
| 21 |
+
def __init__(
|
| 22 |
+
self,
|
| 23 |
+
name: str,
|
| 24 |
+
role: str,
|
| 25 |
+
description: str,
|
| 26 |
+
system_prompt: str,
|
| 27 |
+
temperature: float = 0.7,
|
| 28 |
+
max_tokens: int = 512,
|
| 29 |
+
):
|
| 30 |
+
self.name = name
|
| 31 |
+
self.role = role
|
| 32 |
+
self.description = description
|
| 33 |
+
self.system_prompt = system_prompt
|
| 34 |
+
self.temperature = temperature
|
| 35 |
+
self.max_tokens = max_tokens
|
| 36 |
+
|
| 37 |
+
def __repr__(self) -> str:
|
| 38 |
+
return f"AgentPersona(name={self.name!r}, role={self.role!r})"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class AgentManager:
|
| 42 |
+
"""
|
| 43 |
+
Loads and manages agent personas from Markdown files.
|
| 44 |
+
|
| 45 |
+
Each agent file uses YAML frontmatter for configuration and Markdown body
|
| 46 |
+
for the system prompt content.
|
| 47 |
+
|
| 48 |
+
Usage:
|
| 49 |
+
manager = AgentManager("agents/")
|
| 50 |
+
persona = manager.get_agent("generator_civil_response")
|
| 51 |
+
prompt = manager.build_prompt("generator_civil_response", "๋ฏผ์ ๋ต๋ณ ์ด์์ ์์ฑํด์ฃผ์ธ์.")
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n(.*)$", re.DOTALL)
|
| 55 |
+
|
| 56 |
+
def __init__(self, agents_dir: str = "agents"):
|
| 57 |
+
self.agents_dir = agents_dir
|
| 58 |
+
self._agents: Dict[str, AgentPersona] = {}
|
| 59 |
+
self._load_agents()
|
| 60 |
+
|
| 61 |
+
_DANGEROUS_TOKENS = ["[|user|]", "[|assistant|]", "[|system|]", "[|endofturn|]"]
|
| 62 |
+
|
| 63 |
+
def _load_agents(self) -> None:
|
| 64 |
+
base_dir = os.path.realpath(self.agents_dir)
|
| 65 |
+
if not os.path.isdir(base_dir):
|
| 66 |
+
logger.warning(f"Agents directory not found: {base_dir}")
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
for filename in os.listdir(base_dir):
|
| 70 |
+
if not filename.endswith(".md"):
|
| 71 |
+
continue
|
| 72 |
+
filepath = os.path.realpath(os.path.join(base_dir, filename))
|
| 73 |
+
if not filepath.startswith(base_dir + os.sep):
|
| 74 |
+
logger.warning(f"๊ฒฝ๋ก ์ํ ์๋ ๊ฐ์ง, ๋ฌด์๋จ: {filename}")
|
| 75 |
+
continue
|
| 76 |
+
try:
|
| 77 |
+
agent = self._parse_agent_file(filepath)
|
| 78 |
+
self._agents[agent.name] = agent
|
| 79 |
+
logger.info(f"Loaded agent: {agent.name} ({agent.role})")
|
| 80 |
+
except Exception as e:
|
| 81 |
+
logger.error(f"Failed to load agent from {filename}: {e}")
|
| 82 |
+
|
| 83 |
+
def _parse_agent_file(self, filepath: str) -> AgentPersona:
|
| 84 |
+
with open(filepath, "r", encoding="utf-8") as f:
|
| 85 |
+
content = f.read()
|
| 86 |
+
|
| 87 |
+
match = self._FRONTMATTER_RE.match(content)
|
| 88 |
+
if not match:
|
| 89 |
+
raise ValueError(f"Invalid agent file format (missing YAML frontmatter): {filepath}")
|
| 90 |
+
|
| 91 |
+
frontmatter = yaml.safe_load(match.group(1))
|
| 92 |
+
body = match.group(2).strip()
|
| 93 |
+
|
| 94 |
+
name = (frontmatter.get("name") or "").strip()
|
| 95 |
+
if not name or not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name):
|
| 96 |
+
raise ValueError(f"์ ํจํ์ง ์์ ์์ด์ ํธ ์ด๋ฆ: {name!r} in {filepath}")
|
| 97 |
+
|
| 98 |
+
return AgentPersona(
|
| 99 |
+
name=name,
|
| 100 |
+
role=frontmatter.get("role", ""),
|
| 101 |
+
description=frontmatter.get("description", ""),
|
| 102 |
+
system_prompt=body,
|
| 103 |
+
temperature=float(frontmatter.get("temperature", 0.7)),
|
| 104 |
+
max_tokens=int(frontmatter.get("max_tokens", 512)),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
def get_agent(self, name: str) -> Optional[AgentPersona]:
|
| 108 |
+
return self._agents.get(name)
|
| 109 |
+
|
| 110 |
+
def list_agents(self) -> List[str]:
|
| 111 |
+
return list(self._agents.keys())
|
| 112 |
+
|
| 113 |
+
def build_prompt(self, agent_name: str, user_message: str) -> str:
|
| 114 |
+
"""
|
| 115 |
+
Build an EXAONE chat template prompt with the agent's system message.
|
| 116 |
+
|
| 117 |
+
Format:
|
| 118 |
+
[|system|]{system_prompt}[|endofturn|]
|
| 119 |
+
[|user|]{user_message}[|endofturn|]
|
| 120 |
+
[|assistant|]
|
| 121 |
+
"""
|
| 122 |
+
agent = self._agents.get(agent_name)
|
| 123 |
+
if agent is None:
|
| 124 |
+
raise ValueError(f"Unknown agent: {agent_name}")
|
| 125 |
+
|
| 126 |
+
for token in self._DANGEROUS_TOKENS:
|
| 127 |
+
if token in user_message:
|
| 128 |
+
raise ValueError(f"์ด์ค์ผ์ดํ๋์ง ์์ ํน์ ํ ํฐ ๊ฐ์ง: {token}")
|
| 129 |
+
|
| 130 |
+
return (
|
| 131 |
+
f"[|system|]{agent.system_prompt}[|endofturn|]"
|
| 132 |
+
f"\n[|user|]{user_message}[|endofturn|]"
|
| 133 |
+
f"\n[|assistant|]"
|
| 134 |
+
)
|
src/inference/api_server.py
ADDED
|
@@ -0,0 +1,1899 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
import time
|
| 6 |
+
import uuid
|
| 7 |
+
from contextlib import asynccontextmanager
|
| 8 |
+
from dataclasses import asdict, dataclass
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any, AsyncGenerator, Dict, List, Optional
|
| 11 |
+
|
| 12 |
+
from fastapi import Depends, FastAPI, HTTPException, Request, Security
|
| 13 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
+
from fastapi.responses import StreamingResponse
|
| 15 |
+
from fastapi.security import APIKeyHeader
|
| 16 |
+
from loguru import logger
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from vllm import AsyncLLM, SamplingParams
|
| 20 |
+
except ImportError:
|
| 21 |
+
try:
|
| 22 |
+
from vllm.engine.async_llm_engine import AsyncLLMEngine as AsyncLLM
|
| 23 |
+
from vllm.sampling_params import SamplingParams
|
| 24 |
+
except ImportError:
|
| 25 |
+
AsyncLLM = None
|
| 26 |
+
SamplingParams = None
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
from vllm.lora.request import LoRARequest
|
| 30 |
+
except ImportError:
|
| 31 |
+
LoRARequest = None
|
| 32 |
+
|
| 33 |
+
# Multi-LoRA adapter name โ numeric ID ๋งคํ (vLLM LoRARequest์ ์ ๋ฌ)
|
| 34 |
+
_LORA_ID_MAP: Dict[str, int] = {"civil": 1, "legal": 2}
|
| 35 |
+
|
| 36 |
+
from .agent_loop import AgentLoop, AgentTrace
|
| 37 |
+
from .agent_manager import AgentManager
|
| 38 |
+
from .bm25_indexer import BM25Indexer
|
| 39 |
+
from .feature_flags import FeatureFlags
|
| 40 |
+
from .hybrid_search import HybridSearchEngine, SearchMode
|
| 41 |
+
from .index_manager import IndexType, MultiIndexManager
|
| 42 |
+
from .retriever import CivilComplaintRetriever
|
| 43 |
+
from .runtime_config import RuntimeConfig
|
| 44 |
+
from .schemas import (
|
| 45 |
+
AgentRunRequest,
|
| 46 |
+
AgentRunResponse,
|
| 47 |
+
AgentTraceSchema,
|
| 48 |
+
GenerateCivilResponseRequest,
|
| 49 |
+
GenerateCivilResponseResponse,
|
| 50 |
+
GenerateRequest,
|
| 51 |
+
GenerateResponse,
|
| 52 |
+
RetrievedCase,
|
| 53 |
+
SearchRequest,
|
| 54 |
+
SearchResponse,
|
| 55 |
+
SearchResult,
|
| 56 |
+
ToolResultSchema,
|
| 57 |
+
)
|
| 58 |
+
from .session_context import SessionContext, SessionStore
|
| 59 |
+
from .tool_router import ToolType, tool_name
|
| 60 |
+
|
| 61 |
+
SessionLocal = None
|
| 62 |
+
LocalDocumentIndexer = None
|
| 63 |
+
SKIP_MODEL_LOAD = os.getenv("SKIP_MODEL_LOAD", "false").lower() in ("true", "1", "yes")
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
async def _noop_tool(query: str, context: dict, session: Any) -> dict:
|
| 67 |
+
"""build_mvp_registry fallback์ฉ no-op tool."""
|
| 68 |
+
return {"success": False, "error": "tool์ด ์ด๊ธฐํ๋์ง ์์์ต๋๋ค"}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if not SKIP_MODEL_LOAD:
|
| 72 |
+
try:
|
| 73 |
+
from vllm.engine.arg_utils import AsyncEngineArgs
|
| 74 |
+
|
| 75 |
+
from .vllm_stabilizer import apply_transformers_patch
|
| 76 |
+
except ImportError:
|
| 77 |
+
logger.warning("vllm modules not found. Model loading will fail if attempted.")
|
| 78 |
+
AsyncEngineArgs = object
|
| 79 |
+
apply_transformers_patch = lambda: None
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
from slowapi import Limiter
|
| 83 |
+
from slowapi.middleware import SlowAPIMiddleware
|
| 84 |
+
from slowapi.util import get_remote_address
|
| 85 |
+
|
| 86 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 87 |
+
_RATE_LIMIT_AVAILABLE = True
|
| 88 |
+
except ImportError:
|
| 89 |
+
limiter = None
|
| 90 |
+
_RATE_LIMIT_AVAILABLE = False
|
| 91 |
+
|
| 92 |
+
_API_KEY = os.getenv("API_KEY")
|
| 93 |
+
_api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
async def verify_api_key(api_key: str = Security(_api_key_header)):
|
| 97 |
+
if _API_KEY is None:
|
| 98 |
+
return
|
| 99 |
+
if api_key != _API_KEY:
|
| 100 |
+
raise HTTPException(status_code=401, detail="์ ํจํ์ง ์์ API ํค์
๋๋ค.")
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
runtime_config = RuntimeConfig.from_env()
|
| 104 |
+
runtime_config.log_summary()
|
| 105 |
+
|
| 106 |
+
MODEL_PATH = runtime_config.model.model_path
|
| 107 |
+
DATA_PATH = runtime_config.paths.data_path
|
| 108 |
+
INDEX_PATH = runtime_config.paths.index_path
|
| 109 |
+
GPU_UTILIZATION = runtime_config.gpu_utilization
|
| 110 |
+
MAX_MODEL_LEN = runtime_config.max_model_len
|
| 111 |
+
TRUST_REMOTE_CODE = runtime_config.model.trust_remote_code
|
| 112 |
+
_PROJECT_ROOT = str(Path(__file__).resolve().parent.parent.parent)
|
| 113 |
+
AGENTS_DIR = runtime_config.paths.agents_dir
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@dataclass
|
| 117 |
+
class PreparedGeneration:
|
| 118 |
+
prompt: str
|
| 119 |
+
sampling_params: SamplingParams
|
| 120 |
+
retrieved_cases: List[dict]
|
| 121 |
+
search_results: List[SearchResult]
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if not SKIP_MODEL_LOAD:
|
| 125 |
+
apply_transformers_patch()
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _extract_content_by_type(result: dict, index_type: IndexType) -> str:
|
| 129 |
+
extras = result.get("extras", {})
|
| 130 |
+
if index_type == IndexType.CASE:
|
| 131 |
+
case_text = "\n".join(
|
| 132 |
+
part
|
| 133 |
+
for part in (extras.get("complaint_text", ""), extras.get("answer_text", ""))
|
| 134 |
+
if part
|
| 135 |
+
).strip()
|
| 136 |
+
text = case_text or extras.get("content", "") or extras.get("chunk_text", "")
|
| 137 |
+
elif index_type == IndexType.LAW:
|
| 138 |
+
text = (
|
| 139 |
+
extras.get("law_text", "") or extras.get("content", "") or extras.get("chunk_text", "")
|
| 140 |
+
)
|
| 141 |
+
elif index_type == IndexType.MANUAL:
|
| 142 |
+
text = (
|
| 143 |
+
extras.get("manual_text", "")
|
| 144 |
+
or extras.get("content", "")
|
| 145 |
+
or extras.get("chunk_text", "")
|
| 146 |
+
)
|
| 147 |
+
elif index_type == IndexType.NOTICE:
|
| 148 |
+
text = (
|
| 149 |
+
extras.get("notice_text", "")
|
| 150 |
+
or extras.get("content", "")
|
| 151 |
+
or extras.get("chunk_text", "")
|
| 152 |
+
)
|
| 153 |
+
else:
|
| 154 |
+
text = ""
|
| 155 |
+
return text or result.get("title", "")
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _extract_approval_request(graph_state: Any) -> Any:
|
| 159 |
+
"""LangGraph interrupt state์์ approval payload๋ฅผ ๏ฟฝ๏ฟฝ๏ฟฝ์ถํ๋ค."""
|
| 160 |
+
if not graph_state or not getattr(graph_state, "tasks", None):
|
| 161 |
+
return None
|
| 162 |
+
task = graph_state.tasks[0]
|
| 163 |
+
if not getattr(task, "interrupts", None):
|
| 164 |
+
return None
|
| 165 |
+
return task.interrupts[0].value
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
class vLLMEngineManager:
|
| 169 |
+
"""GovOn Shell MVP์ฉ ๋ก์ปฌ ๋ฐํ์ ๋งค๋์ ."""
|
| 170 |
+
|
| 171 |
+
def __init__(self):
|
| 172 |
+
self.engine: AsyncLLM = None
|
| 173 |
+
self.retriever: CivilComplaintRetriever = None
|
| 174 |
+
self.index_manager: Optional[MultiIndexManager] = None
|
| 175 |
+
self.hybrid_engine: Optional[HybridSearchEngine] = None
|
| 176 |
+
self.bm25_indexers: dict[IndexType, BM25Indexer] = {}
|
| 177 |
+
self.embed_model = None
|
| 178 |
+
self.feature_flags = FeatureFlags.from_env()
|
| 179 |
+
self.session_store = SessionStore()
|
| 180 |
+
self.agent_manager = AgentManager(AGENTS_DIR)
|
| 181 |
+
self.agent_loop: Optional[AgentLoop] = None
|
| 182 |
+
self.graph = None # LangGraph CompiledGraph (v2 ์๋ํฌ์ธํธ์ฉ)
|
| 183 |
+
self.local_document_indexer: Optional[Any] = None
|
| 184 |
+
self.local_document_sync_status: Optional[Dict[str, Any]] = None
|
| 185 |
+
self._local_document_sync_task: Optional[asyncio.Task] = None
|
| 186 |
+
self._checkpointer_ctx = None # AsyncSqliteSaver ์ปจํ
์คํธ ๋งค๋์ (lifespan์์ ๊ด๋ฆฌ)
|
| 187 |
+
self._sync_checkpointer_conn = None # SqliteSaver์ฉ sqlite3 connection (leak ๋ฐฉ์ง)
|
| 188 |
+
self._init_agent_loop()
|
| 189 |
+
self._init_graph()
|
| 190 |
+
|
| 191 |
+
async def initialize(self):
|
| 192 |
+
if SKIP_MODEL_LOAD:
|
| 193 |
+
logger.info("SKIP_MODEL_LOAD=true: ๋ชจ๋ธ ๋ฐ ์ธ๋ฑ์ค ๋ก๋ฉ์ ๊ฑด๋๋๋๋ค.")
|
| 194 |
+
return
|
| 195 |
+
|
| 196 |
+
logger.info(f"Initializing vLLM runtime with model: {MODEL_PATH}")
|
| 197 |
+
# EXAONE 4.0-32B-AWQ ๋ค์ดํฐ๋ธ tool calling ํ์ฑํ:
|
| 198 |
+
# --enable-auto-tool-choice --tool-call-parser hermes
|
| 199 |
+
# Multi-LoRA ์๋น ์ --enable-lora --lora-modules ์ต์
์ถ๊ฐ
|
| 200 |
+
# HuggingFace Spaces L4 (24GB VRAM) ๊ธฐ์ค ~20GB ์ ์
|
| 201 |
+
try:
|
| 202 |
+
engine_kwargs = dict(
|
| 203 |
+
model=MODEL_PATH,
|
| 204 |
+
trust_remote_code=TRUST_REMOTE_CODE,
|
| 205 |
+
gpu_memory_utilization=GPU_UTILIZATION,
|
| 206 |
+
max_model_len=MAX_MODEL_LEN,
|
| 207 |
+
dtype=runtime_config.model.dtype,
|
| 208 |
+
enforce_eager=runtime_config.model.enforce_eager,
|
| 209 |
+
)
|
| 210 |
+
# Multi-LoRA ์๋น: adapter_paths๊ฐ ์ค์ ๋์ด ์์ผ๋ฉด ํ์ฑํ
|
| 211 |
+
lora_enabled = bool(runtime_config.model.adapter_paths)
|
| 212 |
+
if lora_enabled:
|
| 213 |
+
engine_kwargs.update(
|
| 214 |
+
enable_lora=True,
|
| 215 |
+
max_loras=4,
|
| 216 |
+
max_lora_rank=64,
|
| 217 |
+
)
|
| 218 |
+
logger.info(
|
| 219 |
+
f"Multi-LoRA ํ์ฑํ: adapters={list(runtime_config.model.adapter_paths.keys())}"
|
| 220 |
+
)
|
| 221 |
+
engine_args = AsyncEngineArgs(**engine_kwargs)
|
| 222 |
+
if hasattr(AsyncLLM, "from_engine_args"):
|
| 223 |
+
self.engine = AsyncLLM.from_engine_args(engine_args)
|
| 224 |
+
else:
|
| 225 |
+
self.engine = AsyncLLM(engine_args)
|
| 226 |
+
except Exception as exc:
|
| 227 |
+
logger.error(f"vLLM ์์ง ์ด๊ธฐํ ์คํจ: {exc}")
|
| 228 |
+
raise
|
| 229 |
+
|
| 230 |
+
logger.info(f"Initializing retriever with index: {INDEX_PATH}")
|
| 231 |
+
self.retriever = CivilComplaintRetriever(
|
| 232 |
+
index_path=INDEX_PATH if os.path.exists(INDEX_PATH) else None,
|
| 233 |
+
data_path=DATA_PATH if not os.path.exists(INDEX_PATH) else None,
|
| 234 |
+
)
|
| 235 |
+
if self.retriever.index is not None and not os.path.exists(INDEX_PATH):
|
| 236 |
+
self.retriever.save_index(INDEX_PATH)
|
| 237 |
+
|
| 238 |
+
faiss_index_dir = runtime_config.paths.faiss_index_dir
|
| 239 |
+
local_docs_root = runtime_config.paths.local_docs_root
|
| 240 |
+
if os.path.isdir(faiss_index_dir) or local_docs_root:
|
| 241 |
+
self.index_manager = MultiIndexManager(base_dir=faiss_index_dir)
|
| 242 |
+
logger.info(f"MultiIndexManager ์ด๊ธฐํ ์๋ฃ: {faiss_index_dir}")
|
| 243 |
+
else:
|
| 244 |
+
logger.warning(f"FAISS ์ธ๋ฑ์ค ๋๋ ํฐ๋ฆฌ ๋ฏธ์กด์ฌ: {faiss_index_dir}")
|
| 245 |
+
|
| 246 |
+
bm25_index_dir = os.getenv("BM25_INDEX_DIR", "models/bm25_index")
|
| 247 |
+
if os.path.isdir(bm25_index_dir):
|
| 248 |
+
for idx_type in IndexType:
|
| 249 |
+
bm25_path = os.path.join(bm25_index_dir, f"{idx_type.value}.pkl")
|
| 250 |
+
if not os.path.exists(bm25_path):
|
| 251 |
+
continue
|
| 252 |
+
try:
|
| 253 |
+
indexer = BM25Indexer()
|
| 254 |
+
indexer.load(bm25_path)
|
| 255 |
+
self.bm25_indexers[idx_type] = indexer
|
| 256 |
+
logger.info(f"BM25 ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ: {idx_type.value} ({indexer.doc_count}๊ฑด)")
|
| 257 |
+
except Exception as exc:
|
| 258 |
+
logger.warning(f"BM25 ์ธ๋ฑ์ค ๋ก๋ ์คํจ ({idx_type.value}): {exc}")
|
| 259 |
+
|
| 260 |
+
if self.retriever and hasattr(self.retriever, "model"):
|
| 261 |
+
self.embed_model = self.retriever.model
|
| 262 |
+
|
| 263 |
+
if self.index_manager and self.embed_model:
|
| 264 |
+
self.hybrid_engine = HybridSearchEngine(
|
| 265 |
+
index_manager=self.index_manager,
|
| 266 |
+
bm25_indexers=self.bm25_indexers,
|
| 267 |
+
embed_model=self.embed_model,
|
| 268 |
+
)
|
| 269 |
+
logger.info("HybridSearchEngine ์ด๊ธฐํ ์๋ฃ")
|
| 270 |
+
self._schedule_local_document_sync()
|
| 271 |
+
else:
|
| 272 |
+
logger.warning("HybridSearchEngine ๋ฏธ์ด๊ธฐํ: index_manager ๋๋ embed_model ์์")
|
| 273 |
+
|
| 274 |
+
def _schedule_local_document_sync(self) -> None:
|
| 275 |
+
indexer = self._build_local_document_indexer()
|
| 276 |
+
if indexer is None:
|
| 277 |
+
return
|
| 278 |
+
if self._local_document_sync_task and not self._local_document_sync_task.done():
|
| 279 |
+
return
|
| 280 |
+
|
| 281 |
+
self.local_document_sync_status = {
|
| 282 |
+
"status": "syncing",
|
| 283 |
+
"root_dir": str(indexer.root_dir),
|
| 284 |
+
"source_name": indexer.source_name,
|
| 285 |
+
}
|
| 286 |
+
self._local_document_sync_task = asyncio.create_task(self._sync_local_documents_async())
|
| 287 |
+
|
| 288 |
+
async def _sync_local_documents_async(self) -> Optional[Dict[str, Any]]:
|
| 289 |
+
try:
|
| 290 |
+
return await asyncio.to_thread(self.sync_local_documents)
|
| 291 |
+
except Exception as exc:
|
| 292 |
+
logger.error(f"๋ฐฑ๊ทธ๋ผ์ด๋ ๋ก์ปฌ ๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ: {exc}", exc_info=True)
|
| 293 |
+
if self.local_document_indexer is None:
|
| 294 |
+
return None
|
| 295 |
+
self.local_document_sync_status = {
|
| 296 |
+
"status": "error",
|
| 297 |
+
"root_dir": str(self.local_document_indexer.root_dir),
|
| 298 |
+
"source_name": self.local_document_indexer.source_name,
|
| 299 |
+
"error": str(exc),
|
| 300 |
+
}
|
| 301 |
+
return self.local_document_sync_status
|
| 302 |
+
|
| 303 |
+
def _build_local_document_indexer(self) -> Optional[Any]:
|
| 304 |
+
global SessionLocal, LocalDocumentIndexer
|
| 305 |
+
|
| 306 |
+
root_dir = runtime_config.paths.local_docs_root
|
| 307 |
+
if not root_dir:
|
| 308 |
+
return None
|
| 309 |
+
if self.index_manager is None or self.embed_model is None:
|
| 310 |
+
logger.warning(
|
| 311 |
+
"LOCAL_DOCS_ROOT๊ฐ ์ค์ ๋์ง๋ง index_manager ๋๋ embed_model์ด ์์ต๋๋ค."
|
| 312 |
+
)
|
| 313 |
+
return None
|
| 314 |
+
if self.local_document_indexer is None:
|
| 315 |
+
if SessionLocal is None:
|
| 316 |
+
from .db.database import SessionLocal as _SessionLocal
|
| 317 |
+
|
| 318 |
+
SessionLocal = _SessionLocal
|
| 319 |
+
if LocalDocumentIndexer is None:
|
| 320 |
+
from .local_document_indexer import LocalDocumentIndexer as _LocalDocumentIndexer
|
| 321 |
+
|
| 322 |
+
LocalDocumentIndexer = _LocalDocumentIndexer
|
| 323 |
+
|
| 324 |
+
self.local_document_indexer = LocalDocumentIndexer(
|
| 325 |
+
root_dir=root_dir,
|
| 326 |
+
index_manager=self.index_manager,
|
| 327 |
+
embed_model=self.embed_model,
|
| 328 |
+
session_factory=SessionLocal,
|
| 329 |
+
)
|
| 330 |
+
return self.local_document_indexer
|
| 331 |
+
|
| 332 |
+
def sync_local_documents(self) -> Optional[Dict[str, Any]]:
|
| 333 |
+
indexer = self._build_local_document_indexer()
|
| 334 |
+
if indexer is None:
|
| 335 |
+
return None
|
| 336 |
+
|
| 337 |
+
try:
|
| 338 |
+
summary = indexer.sync()
|
| 339 |
+
except Exception as exc:
|
| 340 |
+
logger.error(f"๋ก์ปฌ ๋ฌธ์ ์ธ๋ฑ์ฑ ์คํจ: {exc}", exc_info=True)
|
| 341 |
+
self.local_document_sync_status = {
|
| 342 |
+
"status": "error",
|
| 343 |
+
"root_dir": str(indexer.root_dir),
|
| 344 |
+
"source_name": indexer.source_name,
|
| 345 |
+
"error": str(exc),
|
| 346 |
+
}
|
| 347 |
+
return self.local_document_sync_status
|
| 348 |
+
|
| 349 |
+
self.local_document_sync_status = {
|
| 350 |
+
"status": "ok",
|
| 351 |
+
"root_dir": str(indexer.root_dir),
|
| 352 |
+
"source_name": indexer.source_name,
|
| 353 |
+
**asdict(summary),
|
| 354 |
+
}
|
| 355 |
+
logger.info(
|
| 356 |
+
"๋ก์ปฌ ๋ฌธ์ ์ธ๋ฑ์ฑ ์๋ฃ: "
|
| 357 |
+
f"root={indexer.root_dir}, scanned={summary.scanned_files}, "
|
| 358 |
+
f"indexed={summary.indexed_files}, unchanged={summary.unchanged_files}, "
|
| 359 |
+
f"removed={summary.removed_files}"
|
| 360 |
+
)
|
| 361 |
+
return self.local_document_sync_status
|
| 362 |
+
|
| 363 |
+
def _escape_special_tokens(self, text: str) -> str:
|
| 364 |
+
tokens = [
|
| 365 |
+
"[|user|]",
|
| 366 |
+
"[|assistant|]",
|
| 367 |
+
"[|system|]",
|
| 368 |
+
"[|endofturn|]",
|
| 369 |
+
"<thought>",
|
| 370 |
+
"</thought>",
|
| 371 |
+
]
|
| 372 |
+
for token in tokens:
|
| 373 |
+
text = text.replace(
|
| 374 |
+
token,
|
| 375 |
+
token.replace("[", "\\[")
|
| 376 |
+
.replace("]", "\\]")
|
| 377 |
+
.replace("<", "\\<")
|
| 378 |
+
.replace(">", "\\>"),
|
| 379 |
+
)
|
| 380 |
+
return text
|
| 381 |
+
|
| 382 |
+
@staticmethod
|
| 383 |
+
def _strip_thought_blocks(text: str) -> str:
|
| 384 |
+
return re.sub(r"<thought>.*?</thought>\s*", "", text, flags=re.DOTALL).strip()
|
| 385 |
+
|
| 386 |
+
def _build_rag_context(self, retrieved_cases: List[dict]) -> str:
|
| 387 |
+
if not retrieved_cases:
|
| 388 |
+
return ""
|
| 389 |
+
rag_context = "### ์ฐธ๊ณ ์ฌ๋ก (์ ์ฌ ๋ฏผ์ ๋ฐ ๋ต๋ณ):\n"
|
| 390 |
+
for i, case in enumerate(retrieved_cases, start=1):
|
| 391 |
+
complaint = self._escape_special_tokens(case.get("complaint", ""))
|
| 392 |
+
answer = self._escape_special_tokens(case.get("answer", ""))
|
| 393 |
+
rag_context += f"{i}. [๋ฏผ์]: {complaint}\n [๋ต๋ณ]: {answer}\n\n"
|
| 394 |
+
return rag_context
|
| 395 |
+
|
| 396 |
+
def _augment_prompt(self, prompt: str, retrieved_cases: List[dict]) -> str:
|
| 397 |
+
rag_context = self._build_rag_context(retrieved_cases)
|
| 398 |
+
if not rag_context:
|
| 399 |
+
return prompt
|
| 400 |
+
user_tag = "[|user|]"
|
| 401 |
+
if user_tag in prompt:
|
| 402 |
+
return prompt.replace(user_tag, f"{user_tag}{rag_context}\n", 1)
|
| 403 |
+
return f"{rag_context}\n{prompt}"
|
| 404 |
+
|
| 405 |
+
def _build_search_result_context(self, search_results: List[SearchResult], heading: str) -> str:
|
| 406 |
+
if not search_results:
|
| 407 |
+
return ""
|
| 408 |
+
|
| 409 |
+
lines = [heading]
|
| 410 |
+
for index, result in enumerate(search_results, start=1):
|
| 411 |
+
safe_title = self._escape_special_tokens(result.title)
|
| 412 |
+
safe_content = self._escape_special_tokens(result.content[:300])
|
| 413 |
+
lines.append(f"{index}. [{result.source_type.value}] {safe_title}")
|
| 414 |
+
lines.append(f" ๊ทผ๊ฑฐ: {safe_content}")
|
| 415 |
+
return "\n".join(lines)
|
| 416 |
+
|
| 417 |
+
def _build_persona_prompt(self, agent_name: str, user_message: str) -> str:
|
| 418 |
+
if self.agent_manager and self.agent_manager.get_agent(agent_name):
|
| 419 |
+
return self.agent_manager.build_prompt(agent_name, user_message)
|
| 420 |
+
return user_message
|
| 421 |
+
|
| 422 |
+
def _extract_query(self, prompt: str) -> str:
|
| 423 |
+
user_match = re.search(r"\[\|user\|\](.*?)\[\|endofturn\|\]", prompt, re.DOTALL)
|
| 424 |
+
if user_match:
|
| 425 |
+
user_block = user_match.group(1)
|
| 426 |
+
complaint_match = re.search(r"๋ฏผ์\s*๋ด์ฉ\s*:\s*(.+)", user_block, re.DOTALL)
|
| 427 |
+
if complaint_match:
|
| 428 |
+
return complaint_match.group(1).strip()
|
| 429 |
+
return user_block.strip()
|
| 430 |
+
return prompt
|
| 431 |
+
|
| 432 |
+
def _search_results_to_cases(self, search_results: List[SearchResult]) -> List[dict]:
|
| 433 |
+
retrieved_cases: List[dict] = []
|
| 434 |
+
for result in search_results:
|
| 435 |
+
if result.source_type != IndexType.CASE:
|
| 436 |
+
continue
|
| 437 |
+
metadata = result.metadata or {}
|
| 438 |
+
complaint = (
|
| 439 |
+
metadata.get("complaint_text") or metadata.get("complaint") or result.content
|
| 440 |
+
)
|
| 441 |
+
answer = metadata.get("answer_text") or metadata.get("answer") or result.content
|
| 442 |
+
retrieved_cases.append(
|
| 443 |
+
{
|
| 444 |
+
"id": result.doc_id,
|
| 445 |
+
"category": metadata.get("category", ""),
|
| 446 |
+
"complaint": complaint,
|
| 447 |
+
"answer": answer,
|
| 448 |
+
"score": result.score,
|
| 449 |
+
}
|
| 450 |
+
)
|
| 451 |
+
return retrieved_cases
|
| 452 |
+
|
| 453 |
+
@staticmethod
|
| 454 |
+
def _is_evidence_request(query: str) -> bool:
|
| 455 |
+
return any(token in query for token in ("๊ทผ๊ฑฐ", "์ถ์ฒ", "์", "์ด์ ", "๋งํฌ"))
|
| 456 |
+
|
| 457 |
+
@staticmethod
|
| 458 |
+
def _is_revision_request(query: str) -> bool:
|
| 459 |
+
return any(token in query for token in ("๋ค์", "์์ ", "๊ณ ์ณ", "์ ์ค", "๊ณต์", "๋ณด๊ฐ"))
|
| 460 |
+
|
| 461 |
+
def _latest_prior_turns(
|
| 462 |
+
self,
|
| 463 |
+
session: SessionContext,
|
| 464 |
+
current_query: str,
|
| 465 |
+
) -> tuple[Optional[str], Optional[str]]:
|
| 466 |
+
turns = list(session.recent_history)
|
| 467 |
+
if turns and turns[-1].role == "user" and turns[-1].content == current_query:
|
| 468 |
+
turns = turns[:-1]
|
| 469 |
+
|
| 470 |
+
previous_user = next(
|
| 471 |
+
(turn.content for turn in reversed(turns) if turn.role == "user"), None
|
| 472 |
+
)
|
| 473 |
+
previous_assistant = next(
|
| 474 |
+
(turn.content for turn in reversed(turns) if turn.role == "assistant"),
|
| 475 |
+
None,
|
| 476 |
+
)
|
| 477 |
+
return previous_user, previous_assistant
|
| 478 |
+
|
| 479 |
+
def _build_working_query(self, query: str, session: SessionContext) -> str:
|
| 480 |
+
query = query.strip()
|
| 481 |
+
if not query:
|
| 482 |
+
return query
|
| 483 |
+
|
| 484 |
+
if not (self._is_evidence_request(query) or self._is_revision_request(query)):
|
| 485 |
+
return query
|
| 486 |
+
|
| 487 |
+
previous_user, previous_assistant = self._latest_prior_turns(session, query)
|
| 488 |
+
parts: List[str] = []
|
| 489 |
+
if previous_user:
|
| 490 |
+
parts.append(f"์๋ ์์ฒญ: {previous_user}")
|
| 491 |
+
if previous_assistant:
|
| 492 |
+
parts.append(f"์ด์ ๋ต๋ณ: {previous_assistant[:600]}")
|
| 493 |
+
|
| 494 |
+
if self._is_revision_request(query):
|
| 495 |
+
parts.append(f"์์ ์์ฒญ: {query}")
|
| 496 |
+
|
| 497 |
+
return "\n\n".join(parts) if parts else query
|
| 498 |
+
|
| 499 |
+
async def _retrieve_search_results(
|
| 500 |
+
self,
|
| 501 |
+
query: str,
|
| 502 |
+
index_types: List[IndexType],
|
| 503 |
+
top_k_per_type: int = 2,
|
| 504 |
+
) -> List[SearchResult]:
|
| 505 |
+
if not query.strip():
|
| 506 |
+
return []
|
| 507 |
+
|
| 508 |
+
collected: List[SearchResult] = []
|
| 509 |
+
|
| 510 |
+
if self.hybrid_engine:
|
| 511 |
+
|
| 512 |
+
async def _search_index(index_type: IndexType) -> List[SearchResult]:
|
| 513 |
+
results_raw, _ = await self.hybrid_engine.search(
|
| 514 |
+
query=query,
|
| 515 |
+
index_type=index_type,
|
| 516 |
+
top_k=top_k_per_type,
|
| 517 |
+
mode=SearchMode.HYBRID,
|
| 518 |
+
)
|
| 519 |
+
return [
|
| 520 |
+
SearchResult(
|
| 521 |
+
doc_id=item.get("doc_id", ""),
|
| 522 |
+
source_type=IndexType(item.get("doc_type", index_type.value)),
|
| 523 |
+
title=item.get("title", ""),
|
| 524 |
+
content=_extract_content_by_type(item, index_type),
|
| 525 |
+
score=item.get("score", 0.0),
|
| 526 |
+
reliability_score=item.get("reliability_score", 1.0),
|
| 527 |
+
metadata=item.get("extras", {}),
|
| 528 |
+
chunk_index=item.get("chunk_index", 0),
|
| 529 |
+
total_chunks=item.get("chunk_total", 1),
|
| 530 |
+
)
|
| 531 |
+
for item in results_raw
|
| 532 |
+
]
|
| 533 |
+
|
| 534 |
+
grouped = await asyncio.gather(
|
| 535 |
+
*[_search_index(index_type) for index_type in index_types],
|
| 536 |
+
return_exceptions=True,
|
| 537 |
+
)
|
| 538 |
+
for result in grouped:
|
| 539 |
+
if isinstance(result, BaseException):
|
| 540 |
+
logger.warning(f"๋ก์ปฌ ๊ฒ์ ์คํจ: {result}")
|
| 541 |
+
continue
|
| 542 |
+
collected.extend(result)
|
| 543 |
+
|
| 544 |
+
elif self.retriever and IndexType.CASE in index_types:
|
| 545 |
+
for raw in self.retriever.search(query, top_k=max(3, top_k_per_type)):
|
| 546 |
+
collected.append(
|
| 547 |
+
SearchResult(
|
| 548 |
+
doc_id=raw.get("id", raw.get("doc_id", "")),
|
| 549 |
+
source_type=IndexType.CASE,
|
| 550 |
+
title=raw.get("category", "์ ์ฌ ๋ฏผ์ ์ฌ๋ก"),
|
| 551 |
+
content=(raw.get("complaint", "") + "\n" + raw.get("answer", "")).strip(),
|
| 552 |
+
score=raw.get("score", 0.0),
|
| 553 |
+
reliability_score=raw.get("reliability_score", 1.0),
|
| 554 |
+
metadata={
|
| 555 |
+
"complaint": raw.get("complaint", ""),
|
| 556 |
+
"answer": raw.get("answer", ""),
|
| 557 |
+
"category": raw.get("category", ""),
|
| 558 |
+
},
|
| 559 |
+
)
|
| 560 |
+
)
|
| 561 |
+
|
| 562 |
+
return collected
|
| 563 |
+
|
| 564 |
+
@staticmethod
|
| 565 |
+
def _format_evidence_items(evidence_dict: Dict[str, Any]) -> str:
|
| 566 |
+
"""EvidenceEnvelope dict๋ฅผ ์๋นํ์ฌ ์ถ์ฒ ๋ชฉ๋ก ํ
์คํธ๋ฅผ ์์ฑํ๋ค.
|
| 567 |
+
|
| 568 |
+
EvidenceItem์ด ์์ผ๋ฉด source-specific branching ์์ด ๋จ์ผ ํฌ๋งคํฐ๋ก ์ฒ๋ฆฌํ๋ค.
|
| 569 |
+
"""
|
| 570 |
+
items = evidence_dict.get("items", [])
|
| 571 |
+
if not items:
|
| 572 |
+
return ""
|
| 573 |
+
|
| 574 |
+
lines: list[str] = []
|
| 575 |
+
for idx, item in enumerate(items[:10], start=1):
|
| 576 |
+
source_type = item.get("source_type", "")
|
| 577 |
+
title = item.get("title", "")
|
| 578 |
+
link = item.get("link_or_path", "")
|
| 579 |
+
page = item.get("page")
|
| 580 |
+
|
| 581 |
+
if source_type == "rag":
|
| 582 |
+
loc = link or title or "๋ก์ปฌ ๋ฌธ์"
|
| 583 |
+
if page:
|
| 584 |
+
lines.append(f"[{idx}] {loc} (p.{page})")
|
| 585 |
+
else:
|
| 586 |
+
lines.append(f"[{idx}] {loc}")
|
| 587 |
+
elif source_type == "api":
|
| 588 |
+
label = title or "์ธ๋ถ API ๊ฒฐ๊ณผ"
|
| 589 |
+
if link:
|
| 590 |
+
lines.append(f"[{idx}] {label} - {link}")
|
| 591 |
+
else:
|
| 592 |
+
lines.append(f"[{idx}] {label}")
|
| 593 |
+
else:
|
| 594 |
+
label = title or "์์ฑ ์ฐธ์กฐ"
|
| 595 |
+
if link:
|
| 596 |
+
lines.append(f"[{idx}] {label} - {link}")
|
| 597 |
+
else:
|
| 598 |
+
lines.append(f"[{idx}] {label}")
|
| 599 |
+
|
| 600 |
+
return "\n".join(lines)
|
| 601 |
+
|
| 602 |
+
def _summarize_evidence(
|
| 603 |
+
self,
|
| 604 |
+
search_results: List[SearchResult],
|
| 605 |
+
api_lookup_data: Dict[str, Any],
|
| 606 |
+
) -> str:
|
| 607 |
+
# EvidenceEnvelope๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉ
|
| 608 |
+
evidence = api_lookup_data.get("evidence")
|
| 609 |
+
if isinstance(evidence, dict) and evidence.get("items"):
|
| 610 |
+
lines = ["๊ทผ๊ฑฐ ์์ฝ"]
|
| 611 |
+
rag_items = [i for i in evidence["items"] if i.get("source_type") == "rag"]
|
| 612 |
+
api_items = [i for i in evidence["items"] if i.get("source_type") == "api"]
|
| 613 |
+
if rag_items:
|
| 614 |
+
titles = ", ".join(i["title"] for i in rag_items[:3] if i.get("title"))
|
| 615 |
+
lines.append(
|
| 616 |
+
f"- ๋ก์ปฌ ๋ฌธ์ {len(rag_items)}๊ฑด์ ์ฐธ๊ณ ํ์ต๋๋ค."
|
| 617 |
+
+ (f" ์ฃผ์ ๋ฌธ์: {titles}" if titles else "")
|
| 618 |
+
)
|
| 619 |
+
if api_items:
|
| 620 |
+
titles = ", ".join(i["title"] for i in api_items[:3] if i.get("title"))
|
| 621 |
+
lines.append(
|
| 622 |
+
f"- ์ธ๋ถ ๋ฏผ์๋ถ์ API์์ ์ ์ฌ ์ฌ๋ก {len(api_items)}๊ฑด์ ํ์ธํ์ต๋๋ค."
|
| 623 |
+
+ (f" ๋ํ ์ฌ๋ก: {titles}" if titles else "")
|
| 624 |
+
)
|
| 625 |
+
if len(lines) == 1:
|
| 626 |
+
lines.append(
|
| 627 |
+
"- ๋ด๋ถ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ถฉ๋ถํ ํ๋ณดํ์ง ๋ชปํด ์ผ๋ฐ ํ์ ์๋ ์์น ๊ธฐ์ค์ผ๋ก ์์ฑํ์ต๋๋ค."
|
| 628 |
+
)
|
| 629 |
+
return "\n".join(lines)
|
| 630 |
+
|
| 631 |
+
# Legacy ํฌ๋งคํฐ (EvidenceItem ์์ ๋)
|
| 632 |
+
lines = ["๊ทผ๊ฑฐ ์์ฝ"]
|
| 633 |
+
|
| 634 |
+
if search_results:
|
| 635 |
+
titles = ", ".join(result.title for result in search_results[:3] if result.title)
|
| 636 |
+
lines.append(
|
| 637 |
+
f"- ๋ก์ปฌ ๋ฌธ์ {len(search_results)}๊ฑด์ ์ฐธ๊ณ ํ์ต๋๋ค."
|
| 638 |
+
+ (f" ์ฃผ์ ๋ฌธ์: {titles}" if titles else "")
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
api_results = api_lookup_data.get("results", [])
|
| 642 |
+
if api_results:
|
| 643 |
+
titles = []
|
| 644 |
+
for item in api_results[:3]:
|
| 645 |
+
title = item.get("title") or item.get("qnaTitle") or item.get("question")
|
| 646 |
+
if title:
|
| 647 |
+
titles.append(title)
|
| 648 |
+
lines.append(
|
| 649 |
+
f"- ์ธ๋ถ ๋ฏผ์๋ถ์ API์์ ์ ์ฌ ์ฌ๋ก {len(api_results)}๊ฑด์ ํ์ธํ์ต๋๋ค."
|
| 650 |
+
+ (f" ๋ํ ์ฌ๋ก: {', '.join(titles)}" if titles else "")
|
| 651 |
+
)
|
| 652 |
+
|
| 653 |
+
if len(lines) == 1:
|
| 654 |
+
lines.append(
|
| 655 |
+
"- ๋ด๋ถ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ถฉ๋ถํ ํ๋ณดํ์ง ๋ชปํด ์ผ๋ฐ ํ์ ์๋ ์์น ๊ธฐ์ค์ผ๋ก ์์ฑํ์ต๋๋ค."
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
return "\n".join(lines)
|
| 659 |
+
|
| 660 |
+
@staticmethod
|
| 661 |
+
def _rag_source_line(index: int, item: Dict[str, Any]) -> str:
|
| 662 |
+
metadata = item.get("metadata", {}) or {}
|
| 663 |
+
location = (
|
| 664 |
+
metadata.get("file_path")
|
| 665 |
+
or metadata.get("source_path")
|
| 666 |
+
or metadata.get("path")
|
| 667 |
+
or metadata.get("source")
|
| 668 |
+
or item.get("title")
|
| 669 |
+
or item.get("doc_id")
|
| 670 |
+
or "๋ก์ปฌ ๋ฌธ์"
|
| 671 |
+
)
|
| 672 |
+
page = metadata.get("page") or metadata.get("page_number") or metadata.get("page_no")
|
| 673 |
+
if page:
|
| 674 |
+
return f"[{index}] {location} (p.{page})"
|
| 675 |
+
return f"[{index}] {location}"
|
| 676 |
+
|
| 677 |
+
@staticmethod
|
| 678 |
+
def _api_source_line(index: int, item: Dict[str, Any]) -> str:
|
| 679 |
+
title = item.get("title") or item.get("qnaTitle") or item.get("question") or "์ธ๋ถ API ๊ฒฐ๊ณผ"
|
| 680 |
+
url = item.get("url") or item.get("detailUrl") or ""
|
| 681 |
+
if url:
|
| 682 |
+
return f"[{index}] {title} - {url}"
|
| 683 |
+
return f"[{index}] {title}"
|
| 684 |
+
|
| 685 |
+
def _build_evidence_section(
|
| 686 |
+
self,
|
| 687 |
+
session: SessionContext,
|
| 688 |
+
current_query: str,
|
| 689 |
+
rag_data: Dict[str, Any],
|
| 690 |
+
api_data: Dict[str, Any],
|
| 691 |
+
) -> str:
|
| 692 |
+
_, previous_answer = self._latest_prior_turns(session, current_query)
|
| 693 |
+
lines = ["๊ทผ๊ฑฐ/์ถ์ฒ"]
|
| 694 |
+
cursor = 1
|
| 695 |
+
|
| 696 |
+
# EvidenceEnvelope๊ฐ ์์ผ๋ฉด ๋จ์ผ ํฌ๋งคํฐ๋ก ์ฐ์ ์ฒ๋ฆฌ
|
| 697 |
+
rag_evidence = rag_data.get("evidence")
|
| 698 |
+
api_evidence = api_data.get("evidence")
|
| 699 |
+
|
| 700 |
+
if rag_evidence and isinstance(rag_evidence, dict) and rag_evidence.get("items"):
|
| 701 |
+
for item in rag_evidence["items"][:5]:
|
| 702 |
+
source_type = item.get("source_type", "rag")
|
| 703 |
+
if source_type == "rag":
|
| 704 |
+
link = item.get("link_or_path", "")
|
| 705 |
+
page = item.get("page")
|
| 706 |
+
loc = link or item.get("title", "") or "๋ก์ปฌ ๋ฌธ์"
|
| 707 |
+
if page:
|
| 708 |
+
lines.append(f"[{cursor}] {loc} (p.{page})")
|
| 709 |
+
else:
|
| 710 |
+
lines.append(f"[{cursor}] {loc}")
|
| 711 |
+
cursor += 1
|
| 712 |
+
else:
|
| 713 |
+
# Legacy RAG ํฌ๋งคํฐ
|
| 714 |
+
for item in rag_data.get("results", [])[:5]:
|
| 715 |
+
lines.append(self._rag_source_line(cursor, item))
|
| 716 |
+
cursor += 1
|
| 717 |
+
|
| 718 |
+
if api_evidence and isinstance(api_evidence, dict) and api_evidence.get("items"):
|
| 719 |
+
for item in api_evidence["items"][:5]:
|
| 720 |
+
title = item.get("title", "") or "์ธ๋ถ API ๊ฒฐ๊ณผ"
|
| 721 |
+
link = item.get("link_or_path", "")
|
| 722 |
+
if link:
|
| 723 |
+
lines.append(f"[{cursor}] {title} - {link}")
|
| 724 |
+
else:
|
| 725 |
+
lines.append(f"[{cursor}] {title}")
|
| 726 |
+
cursor += 1
|
| 727 |
+
else:
|
| 728 |
+
# Legacy API ํฌ๋งคํฐ
|
| 729 |
+
api_items = api_data.get("citations") or api_data.get("results") or []
|
| 730 |
+
for item in api_items[:5]:
|
| 731 |
+
lines.append(self._api_source_line(cursor, item))
|
| 732 |
+
cursor += 1
|
| 733 |
+
|
| 734 |
+
if cursor == 1:
|
| 735 |
+
lines.append("- ๊ฒ์ ๊ฐ๋ฅํ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค.")
|
| 736 |
+
|
| 737 |
+
section = "\n".join(lines)
|
| 738 |
+
if previous_answer:
|
| 739 |
+
return f"{previous_answer}\n\n{section}"
|
| 740 |
+
return section
|
| 741 |
+
|
| 742 |
+
async def _prepare_civil_response_generation(
|
| 743 |
+
self,
|
| 744 |
+
request: GenerateCivilResponseRequest,
|
| 745 |
+
flags: Optional[FeatureFlags] = None,
|
| 746 |
+
external_cases: Optional[List[dict]] = None,
|
| 747 |
+
) -> PreparedGeneration:
|
| 748 |
+
effective_flags = flags or self.feature_flags
|
| 749 |
+
query = self._escape_special_tokens(self._extract_query(request.prompt))
|
| 750 |
+
search_results: List[SearchResult] = []
|
| 751 |
+
|
| 752 |
+
if request.use_rag and effective_flags.use_rag_pipeline:
|
| 753 |
+
search_results = await self._retrieve_search_results(
|
| 754 |
+
query,
|
| 755 |
+
[IndexType.CASE, IndexType.LAW, IndexType.MANUAL, IndexType.NOTICE],
|
| 756 |
+
)
|
| 757 |
+
|
| 758 |
+
retrieved_cases = self._search_results_to_cases(search_results)
|
| 759 |
+
if external_cases:
|
| 760 |
+
retrieved_cases.extend(external_cases)
|
| 761 |
+
|
| 762 |
+
safe_message = self._escape_special_tokens(request.prompt)
|
| 763 |
+
sections = []
|
| 764 |
+
if search_results:
|
| 765 |
+
sections.append(
|
| 766 |
+
self._build_search_result_context(
|
| 767 |
+
search_results,
|
| 768 |
+
"### ๋ฏผ์ ๋ต๋ณ ์ฐธ๊ณ ์๋ฃ (์ฌ๋ก/๋ฒ๋ฅ /๋งค๋ด์ผ/๊ณต์์ ๋ณด):",
|
| 769 |
+
)
|
| 770 |
+
)
|
| 771 |
+
if retrieved_cases:
|
| 772 |
+
sections.append(self._build_rag_context(retrieved_cases[:5]))
|
| 773 |
+
sections.append(
|
| 774 |
+
"์ ๊ทผ๊ฑฐ๋ฅผ ๋ฐํ์ผ๋ก ๋ฏผ์์ธ์ ๋ถํธ์ ๊ณต๊ฐํ๊ณ , ํ์ฌ ์กฐ์น ์ํฉ๊ณผ ์ฒ๋ฆฌ ์ ์ฐจ๋ฅผ ํฌํจํ ํ์ ์ด์์ ์์ฑํ์ธ์."
|
| 775 |
+
)
|
| 776 |
+
sections.append(safe_message)
|
| 777 |
+
augmented_prompt = self._build_persona_prompt(
|
| 778 |
+
"generator_civil_response",
|
| 779 |
+
"\n\n".join(section for section in sections if section),
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
gen_defaults = runtime_config.generation
|
| 783 |
+
sampling_params = SamplingParams(
|
| 784 |
+
temperature=request.temperature,
|
| 785 |
+
top_p=request.top_p,
|
| 786 |
+
max_tokens=request.max_tokens,
|
| 787 |
+
stop=request.stop or gen_defaults.stop_sequences,
|
| 788 |
+
repetition_penalty=gen_defaults.repetition_penalty,
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
return PreparedGeneration(
|
| 792 |
+
prompt=augmented_prompt,
|
| 793 |
+
sampling_params=sampling_params,
|
| 794 |
+
retrieved_cases=retrieved_cases[:5],
|
| 795 |
+
search_results=search_results,
|
| 796 |
+
)
|
| 797 |
+
|
| 798 |
+
async def _run_engine(
|
| 799 |
+
self,
|
| 800 |
+
prompt: str,
|
| 801 |
+
sampling_params: SamplingParams,
|
| 802 |
+
request_id: str,
|
| 803 |
+
lora_request=None,
|
| 804 |
+
):
|
| 805 |
+
if self.engine is None:
|
| 806 |
+
return None
|
| 807 |
+
|
| 808 |
+
result = self.engine.generate(
|
| 809 |
+
prompt, sampling_params, request_id, lora_request=lora_request
|
| 810 |
+
)
|
| 811 |
+
if hasattr(result, "__aiter__"):
|
| 812 |
+
final_output = None
|
| 813 |
+
async for output in result:
|
| 814 |
+
final_output = output
|
| 815 |
+
return final_output
|
| 816 |
+
return await result
|
| 817 |
+
|
| 818 |
+
async def generate(
|
| 819 |
+
self,
|
| 820 |
+
request: GenerateRequest,
|
| 821 |
+
request_id: str,
|
| 822 |
+
flags: Optional[FeatureFlags] = None,
|
| 823 |
+
) -> tuple[Any, List[dict]]:
|
| 824 |
+
output, retrieved_cases, _ = await self.generate_civil_response(request, request_id, flags)
|
| 825 |
+
return output, retrieved_cases
|
| 826 |
+
|
| 827 |
+
async def generate_civil_response(
|
| 828 |
+
self,
|
| 829 |
+
request: GenerateCivilResponseRequest,
|
| 830 |
+
request_id: str,
|
| 831 |
+
flags: Optional[FeatureFlags] = None,
|
| 832 |
+
external_cases: Optional[List[dict]] = None,
|
| 833 |
+
lora_request=None,
|
| 834 |
+
) -> tuple[Any, List[dict], List[SearchResult]]:
|
| 835 |
+
prepared = await self._prepare_civil_response_generation(request, flags, external_cases)
|
| 836 |
+
output = await self._run_engine(
|
| 837 |
+
prepared.prompt, prepared.sampling_params, request_id, lora_request=lora_request
|
| 838 |
+
)
|
| 839 |
+
return output, prepared.retrieved_cases, prepared.search_results
|
| 840 |
+
|
| 841 |
+
async def generate_stream(
|
| 842 |
+
self,
|
| 843 |
+
request: GenerateRequest,
|
| 844 |
+
request_id: str,
|
| 845 |
+
flags: Optional[FeatureFlags] = None,
|
| 846 |
+
) -> tuple[Any, List[dict], List[SearchResult]]:
|
| 847 |
+
prepared = await self._prepare_civil_response_generation(request, flags)
|
| 848 |
+
if self.engine is None:
|
| 849 |
+
raise RuntimeError("๋ชจ๋ธ ์์ง์ด ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 850 |
+
if hasattr(self.engine, "stream"):
|
| 851 |
+
stream = self.engine.stream(prepared.prompt, prepared.sampling_params, request_id)
|
| 852 |
+
else:
|
| 853 |
+
stream = self.engine.generate(prepared.prompt, prepared.sampling_params, request_id)
|
| 854 |
+
return stream, prepared.retrieved_cases, prepared.search_results
|
| 855 |
+
|
| 856 |
+
def _init_agent_loop(self) -> None:
|
| 857 |
+
from src.inference.actions.data_go_kr import MinwonAnalysisAction
|
| 858 |
+
|
| 859 |
+
engine_ref = self
|
| 860 |
+
minwon_action = MinwonAnalysisAction()
|
| 861 |
+
|
| 862 |
+
async def _rag_search_tool(query: str, context: dict, session: SessionContext) -> dict:
|
| 863 |
+
working_query = query.strip()
|
| 864 |
+
search_results = await engine_ref._retrieve_search_results(
|
| 865 |
+
working_query,
|
| 866 |
+
[IndexType.CASE, IndexType.LAW, IndexType.MANUAL, IndexType.NOTICE],
|
| 867 |
+
)
|
| 868 |
+
return {
|
| 869 |
+
"query": working_query,
|
| 870 |
+
"count": len(search_results),
|
| 871 |
+
"results": [result.model_dump() for result in search_results],
|
| 872 |
+
"context_text": engine_ref._build_search_result_context(
|
| 873 |
+
search_results,
|
| 874 |
+
"### ๋ก์ปฌ ๋ฌธ์ ๊ฒ์ ๊ฒฐ๊ณผ:",
|
| 875 |
+
),
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
async def _api_lookup_tool(query: str, context: dict, session: SessionContext) -> dict:
|
| 879 |
+
working_query = query.strip()
|
| 880 |
+
payload = await minwon_action.fetch_similar_cases(
|
| 881 |
+
working_query,
|
| 882 |
+
{
|
| 883 |
+
**context,
|
| 884 |
+
"session_context": session.build_context_summary(),
|
| 885 |
+
},
|
| 886 |
+
)
|
| 887 |
+
results = payload["results"] or []
|
| 888 |
+
return {
|
| 889 |
+
"query": payload["query"],
|
| 890 |
+
"count": len(results),
|
| 891 |
+
"results": results,
|
| 892 |
+
"context_text": payload["context_text"],
|
| 893 |
+
"citations": [citation.to_dict() for citation in payload["citations"]],
|
| 894 |
+
"source": "data.go.kr",
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
async def _draft_civil_response_tool(
|
| 898 |
+
query: str,
|
| 899 |
+
context: dict,
|
| 900 |
+
session: SessionContext,
|
| 901 |
+
) -> dict:
|
| 902 |
+
working_query = engine_ref._build_working_query(query, session)
|
| 903 |
+
api_lookup_data = context.get(ToolType.API_LOOKUP.value, {})
|
| 904 |
+
|
| 905 |
+
external_cases = []
|
| 906 |
+
for item in api_lookup_data.get("results", [])[:3]:
|
| 907 |
+
complaint = (
|
| 908 |
+
item.get("content") or item.get("qnaContent") or item.get("question", "")
|
| 909 |
+
)
|
| 910 |
+
answer = item.get("answer") or item.get("qnaAnswer") or item.get("title", "")
|
| 911 |
+
if complaint or answer:
|
| 912 |
+
external_cases.append(
|
| 913 |
+
{
|
| 914 |
+
"complaint": complaint,
|
| 915 |
+
"answer": answer,
|
| 916 |
+
"score": float(item.get("score", 0.0)),
|
| 917 |
+
}
|
| 918 |
+
)
|
| 919 |
+
|
| 920 |
+
# Multi-LoRA: civil ์ด๋ํฐ๊ฐ ์ค์ ๋์ด ์์ผ๋ฉด LoRARequest ์์ฑ
|
| 921 |
+
civil_adapter_path = runtime_config.model.adapter_paths.get("civil")
|
| 922 |
+
lora_req = None
|
| 923 |
+
if civil_adapter_path and LoRARequest is not None:
|
| 924 |
+
lora_req = LoRARequest("civil", _LORA_ID_MAP["civil"], civil_adapter_path)
|
| 925 |
+
|
| 926 |
+
gen_request = GenerateCivilResponseRequest(
|
| 927 |
+
prompt=working_query,
|
| 928 |
+
max_tokens=512,
|
| 929 |
+
temperature=0.7,
|
| 930 |
+
use_rag=True,
|
| 931 |
+
)
|
| 932 |
+
request_id = str(uuid.uuid4())
|
| 933 |
+
final_output, retrieved_cases, search_results = (
|
| 934 |
+
await engine_ref.generate_civil_response(
|
| 935 |
+
gen_request,
|
| 936 |
+
request_id,
|
| 937 |
+
external_cases=external_cases,
|
| 938 |
+
lora_request=lora_req,
|
| 939 |
+
)
|
| 940 |
+
)
|
| 941 |
+
if final_output is None:
|
| 942 |
+
return {"text": "", "error": "๋ฏผ์ ๋ต๋ณ ์์ฑ ์คํจ"}
|
| 943 |
+
|
| 944 |
+
draft_text = engine_ref._strip_thought_blocks(final_output.outputs[0].text)
|
| 945 |
+
text = (
|
| 946 |
+
engine_ref._summarize_evidence(search_results, api_lookup_data)
|
| 947 |
+
+ "\n\n์ต์ข
์ด์\n"
|
| 948 |
+
+ draft_text
|
| 949 |
+
)
|
| 950 |
+
return {
|
| 951 |
+
"text": text,
|
| 952 |
+
"draft_text": draft_text,
|
| 953 |
+
"retrieved_cases": retrieved_cases,
|
| 954 |
+
"search_results": [result.model_dump() for result in search_results],
|
| 955 |
+
"prompt_tokens": len(final_output.prompt_token_ids),
|
| 956 |
+
"completion_tokens": len(final_output.outputs[0].token_ids),
|
| 957 |
+
}
|
| 958 |
+
|
| 959 |
+
async def _append_evidence_tool(
|
| 960 |
+
query: str,
|
| 961 |
+
context: dict,
|
| 962 |
+
session: SessionContext,
|
| 963 |
+
) -> dict:
|
| 964 |
+
rag_data = context.get(ToolType.RAG_SEARCH.value, {})
|
| 965 |
+
api_data = context.get(ToolType.API_LOOKUP.value, {})
|
| 966 |
+
|
| 967 |
+
# ๊ธฐ์กด evidence ํ
์คํธ (fallback์ฉ)
|
| 968 |
+
fallback_text = engine_ref._build_evidence_section(session, query, rag_data, api_data)
|
| 969 |
+
|
| 970 |
+
# LLM์ผ๋ก evidence ๋ณด๊ฐ ์๋
|
| 971 |
+
enhanced_text = fallback_text
|
| 972 |
+
if engine_ref.engine is not None:
|
| 973 |
+
try:
|
| 974 |
+
_, previous_answer = engine_ref._latest_prior_turns(session, query)
|
| 975 |
+
existing_response = engine_ref._escape_special_tokens(previous_answer or "")
|
| 976 |
+
rag_context = engine_ref._escape_special_tokens(
|
| 977 |
+
rag_data.get("context_text", "")
|
| 978 |
+
)
|
| 979 |
+
api_context = ""
|
| 980 |
+
for item in api_data.get("results", [])[:3]:
|
| 981 |
+
title = item.get("title", "")
|
| 982 |
+
content = item.get("content", "") or item.get("qnaContent", "")
|
| 983 |
+
if title or content:
|
| 984 |
+
api_context += (
|
| 985 |
+
f"- {engine_ref._escape_special_tokens(title)}"
|
| 986 |
+
f": {engine_ref._escape_special_tokens(content[:200])}\n"
|
| 987 |
+
)
|
| 988 |
+
|
| 989 |
+
evidence_prompt = (
|
| 990 |
+
"[|system|]๋น์ ์ ๋ํ๋ฏผ๊ตญ ๊ณต๋ฌด์ ๋ฏผ์ ๋ต๋ณ ๋ณด๊ฐ ์ ๋ฌธ๊ฐ์
๋๋ค. "
|
| 991 |
+
"๋ฒ์ ๊ทผ๊ฑฐ์ ๊ด๋ จ ๊ท์ ์ ์ ํํ๊ฒ ์ธ์ฉํ์ฌ evidence ์น์
์ ์์ฑํ์ธ์."
|
| 992 |
+
"[|endofturn|]\n"
|
| 993 |
+
"[|user|]๋ค์ ๋ฏผ์ ๋ต๋ณ๏ฟฝ๏ฟฝ๏ฟฝ ๋ํด ๋ฒ์ ๊ทผ๊ฑฐ์ ๊ด๋ จ ๊ท์ ์ ๋ณด๊ฐํ์ฌ "
|
| 994 |
+
"evidence ์น์
์ ์์ฑํ์ธ์.\n\n"
|
| 995 |
+
f"[๊ธฐ์กด ๋ต๋ณ]\n{existing_response[:800]}\n\n"
|
| 996 |
+
f"[๊ฒ์ ๊ฒฐ๊ณผ]\n{rag_context[:800]}\n\n"
|
| 997 |
+
f"[API ์กฐํ ๊ฒฐ๊ณผ]\n{api_context[:800]}"
|
| 998 |
+
"[|endofturn|]\n[|assistant|]"
|
| 999 |
+
)
|
| 1000 |
+
|
| 1001 |
+
# legal ์ด๋ํฐ LoRA ์ค์
|
| 1002 |
+
legal_adapter_path = runtime_config.model.adapter_paths.get("legal")
|
| 1003 |
+
lora_req = None
|
| 1004 |
+
if legal_adapter_path and LoRARequest is not None:
|
| 1005 |
+
lora_req = LoRARequest("legal", _LORA_ID_MAP["legal"], legal_adapter_path)
|
| 1006 |
+
|
| 1007 |
+
if SamplingParams is not None:
|
| 1008 |
+
sp = SamplingParams(
|
| 1009 |
+
max_tokens=512,
|
| 1010 |
+
temperature=0.5,
|
| 1011 |
+
top_p=0.9,
|
| 1012 |
+
stop=["[|endofturn|]"],
|
| 1013 |
+
)
|
| 1014 |
+
request_id = str(uuid.uuid4())
|
| 1015 |
+
output = await engine_ref._run_engine(
|
| 1016 |
+
evidence_prompt, sp, request_id, lora_request=lora_req
|
| 1017 |
+
)
|
| 1018 |
+
if output is not None and output.outputs:
|
| 1019 |
+
enhanced_text = engine_ref._strip_thought_blocks(output.outputs[0].text)
|
| 1020 |
+
except Exception as exc:
|
| 1021 |
+
logger.warning(f"Evidence LLM ๋ณด๊ฐ ์คํจ, fallback ์ฌ์ฉ: {exc}")
|
| 1022 |
+
enhanced_text = fallback_text
|
| 1023 |
+
|
| 1024 |
+
return {
|
| 1025 |
+
"text": enhanced_text,
|
| 1026 |
+
"rag_results": rag_data.get("results", []),
|
| 1027 |
+
"api_citations": api_data.get("citations", []),
|
| 1028 |
+
}
|
| 1029 |
+
|
| 1030 |
+
tool_registry = {
|
| 1031 |
+
ToolType.RAG_SEARCH: _rag_search_tool,
|
| 1032 |
+
ToolType.API_LOOKUP: _api_lookup_tool,
|
| 1033 |
+
ToolType.DRAFT_CIVIL_RESPONSE: _draft_civil_response_tool,
|
| 1034 |
+
ToolType.APPEND_EVIDENCE: _append_evidence_tool,
|
| 1035 |
+
}
|
| 1036 |
+
self.agent_loop = AgentLoop(tool_registry=tool_registry)
|
| 1037 |
+
|
| 1038 |
+
def _build_tool_registry(self) -> Dict[str, Any]:
|
| 1039 |
+
"""CapabilityBase ๊ธฐ๋ฐ MVP tool registry๋ฅผ ๋ฐํํ๋ค.
|
| 1040 |
+
|
| 1041 |
+
build_mvp_registry()๋ฅผ ์ฌ์ฉํ์ฌ ๋จ์ผ ์์ค์์ registry๋ฅผ ๊ตฌ์ฑํ๋ค.
|
| 1042 |
+
planner metadata์ executor binding์ด ๋์ผํ CapabilityBase ์ธ์คํด์ค์์ ๋์จ๋ค.
|
| 1043 |
+
AgentLoop ํ์ํธํ: AgentLoop._tools์์ closure๋ฅผ ์ถ์ถํ์ฌ wrapper๋ก ๋ํํ๋ค.
|
| 1044 |
+
"""
|
| 1045 |
+
if self.agent_loop is None:
|
| 1046 |
+
return {}
|
| 1047 |
+
|
| 1048 |
+
from src.inference.graph.capabilities.registry import build_mvp_registry
|
| 1049 |
+
|
| 1050 |
+
# AgentLoop์ tool_registry์์ ๊ธฐ์กด closure๋ฅผ ์ถ์ถ
|
| 1051 |
+
raw_tools = {
|
| 1052 |
+
str(k.value if hasattr(k, "value") else k): v for k, v in self.agent_loop._tools.items()
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
return build_mvp_registry(
|
| 1056 |
+
rag_search_fn=raw_tools.get("rag_search", _noop_tool),
|
| 1057 |
+
api_lookup_action=self._get_api_lookup_action(),
|
| 1058 |
+
draft_civil_response_fn=raw_tools.get("draft_civil_response", _noop_tool),
|
| 1059 |
+
append_evidence_fn=raw_tools.get("append_evidence", _noop_tool),
|
| 1060 |
+
)
|
| 1061 |
+
|
| 1062 |
+
def _get_api_lookup_action(self) -> Any:
|
| 1063 |
+
"""AgentLoop์ ๋ฑ๋ก๋ api_lookup์ MinwonAnalysisAction์ ์ถ์ถํ๋ค."""
|
| 1064 |
+
if self.agent_loop is None:
|
| 1065 |
+
return None
|
| 1066 |
+
tool_fn = self.agent_loop._tools.get(ToolType.API_LOOKUP)
|
| 1067 |
+
# ApiLookupCapability์ธ ๊ฒฝ์ฐ action์ ์ง์ ์ถ์ถ
|
| 1068 |
+
if hasattr(tool_fn, "_action"):
|
| 1069 |
+
return tool_fn._action
|
| 1070 |
+
# closure์ธ ๊ฒฝ์ฐ action์ ์ถ์ถํ ์ ์์ผ๋ฏ๋ก None ๋ฐํ
|
| 1071 |
+
# (MinwonAnalysisAction์ _init_agent_loop์์ ์๋ก ์์ฑํ๋ค)
|
| 1072 |
+
try:
|
| 1073 |
+
from src.inference.actions.data_go_kr import MinwonAnalysisAction
|
| 1074 |
+
|
| 1075 |
+
return MinwonAnalysisAction()
|
| 1076 |
+
except Exception:
|
| 1077 |
+
return None
|
| 1078 |
+
|
| 1079 |
+
def _init_graph_with_async_checkpointer(self, checkpointer: object) -> None:
|
| 1080 |
+
"""lifespan์์ AsyncSqliteSaver๊ฐ ์ค๋น๋ ํ graph๋ฅผ ์ฌ๊ตฌ์ฑํ๋ค."""
|
| 1081 |
+
self._init_graph(checkpointer=checkpointer)
|
| 1082 |
+
|
| 1083 |
+
def _init_graph(self, checkpointer: Optional[object] = None) -> None:
|
| 1084 |
+
"""LangGraph StateGraph๋ฅผ ์ด๊ธฐํํ๋ค.
|
| 1085 |
+
|
| 1086 |
+
์ด์ ํ๊ฒฝ์์๋ LLMPlannerAdapter(vLLM OpenAI-compatible endpoint)๋ฅผ ์ฌ์ฉํ๋ค.
|
| 1087 |
+
SKIP_MODEL_LOAD=true ํ๊ฒฝ(CI/ํ
์คํธ)์์๋ RegexPlannerAdapter๊ฐ CI fallback์ผ๋ก ๋์ํ๋ค.
|
| 1088 |
+
RegistryExecutorAdapter๋ ๊ธฐ์กด tool_registry๋ฅผ ์ฌ์ฌ์ฉํ๋ค.
|
| 1089 |
+
|
| 1090 |
+
Parameters
|
| 1091 |
+
----------
|
| 1092 |
+
checkpointer : optional
|
| 1093 |
+
์ธ๋ถ์์ ์ฃผ์
ํ LangGraph checkpointer.
|
| 1094 |
+
None์ด๋ฉด SqliteSaver(๋๊ธฐ sqlite3)๋ฅผ ์๋ํ๊ณ ,
|
| 1095 |
+
import ์คํจ ์ MemorySaver๋ก fallbackํ๋ค.
|
| 1096 |
+
SqliteSaver DB ๊ฒฝ๋ก๋ SessionStore DB์ ๊ฐ์ ๋๋ ํฐ๋ฆฌ์
|
| 1097 |
+
``langgraph_checkpoints.db``๋ก ์์ฑ๋๋ค (๊ด์ฌ์ฌ ๋ถ๋ฆฌ).
|
| 1098 |
+
"""
|
| 1099 |
+
try:
|
| 1100 |
+
from src.inference.graph.builder import build_govon_graph
|
| 1101 |
+
from src.inference.graph.executor_adapter import RegistryExecutorAdapter
|
| 1102 |
+
from src.inference.graph.planner_adapter import LLMPlannerAdapter
|
| 1103 |
+
except ImportError as exc:
|
| 1104 |
+
logger.warning(f"LangGraph graph ์ด๊ธฐํ ์คํจ (import ์ค๋ฅ): {exc}")
|
| 1105 |
+
return
|
| 1106 |
+
|
| 1107 |
+
tool_registry = self._build_tool_registry()
|
| 1108 |
+
|
| 1109 |
+
if SKIP_MODEL_LOAD:
|
| 1110 |
+
# CI/ํ
์คํธ ํ๊ฒฝ: LLM์ด ์์ผ๋ฏ๋ก RegexPlannerAdapter๋ฅผ CI fallback์ผ๋ก ์ฌ์ฉ
|
| 1111 |
+
from src.inference.graph.planner_adapter import RegexPlannerAdapter
|
| 1112 |
+
|
| 1113 |
+
planner = RegexPlannerAdapter(registry=tool_registry)
|
| 1114 |
+
else:
|
| 1115 |
+
# ์ด์ ํ๊ฒฝ: vLLM OpenAI-compatible endpoint๋ฅผ LLMPlannerAdapter๋ก ์ฐ๊ฒฐ
|
| 1116 |
+
# NOTE: ChatOpenAI๋ lazy connection์ด๋ฏ๋ก ์์ฑ ์์ ์ vLLM์ด ๋ฏธ์์์ด์ด๋ ์์ ํ๋ค.
|
| 1117 |
+
# ์ค์ LLM ํธ์ถ์ graph invoke ์์ ์ ๋ฐ์ํ๋ฉฐ,
|
| 1118 |
+
# ๊ทธ๋๋ lifespan์์ vLLM์ด ์ด๋ฏธ ์์๋ ์ํ๋ค.
|
| 1119 |
+
from langchain_openai import ChatOpenAI
|
| 1120 |
+
|
| 1121 |
+
planner_base_url = os.getenv(
|
| 1122 |
+
"LANGGRAPH_MODEL_BASE_URL",
|
| 1123 |
+
f"http://127.0.0.1:{runtime_config.port}/v1",
|
| 1124 |
+
)
|
| 1125 |
+
planner_api_key = os.getenv("LANGGRAPH_MODEL_API_KEY", "EMPTY")
|
| 1126 |
+
planner_model = os.getenv("LANGGRAPH_PLANNER_MODEL", runtime_config.model.model_path)
|
| 1127 |
+
llm = ChatOpenAI(
|
| 1128 |
+
base_url=planner_base_url,
|
| 1129 |
+
api_key=planner_api_key,
|
| 1130 |
+
model=planner_model,
|
| 1131 |
+
temperature=0.0,
|
| 1132 |
+
)
|
| 1133 |
+
planner = LLMPlannerAdapter(llm=llm, registry=tool_registry)
|
| 1134 |
+
executor = RegistryExecutorAdapter(
|
| 1135 |
+
tool_registry=tool_registry,
|
| 1136 |
+
session_store=self.session_store,
|
| 1137 |
+
)
|
| 1138 |
+
|
| 1139 |
+
# checkpointer๊ฐ ์ธ๋ถ์์ ์ฃผ์
๋์ง ์์ผ๋ฉด SqliteSaver๋ฅผ ์๋ํ๋ค.
|
| 1140 |
+
# SqliteSaver๋ ํ๋ก์ธ์ค ์ฌ์์ ํ์๋ interrupt ์ํ๋ฅผ ๋ณต์ํ๋ฏ๋ก
|
| 1141 |
+
# MemorySaver์ ๋ฌ๋ฆฌ ์ฌ์์-์์ (restart-safe)ํ๋ค.
|
| 1142 |
+
if checkpointer is None:
|
| 1143 |
+
checkpointer, conn = _build_sync_sqlite_checkpointer(self.session_store.db_path)
|
| 1144 |
+
# ์ด์ ๋๊ธฐ connection์ด ์์ผ๋ฉด ๋ซ์ leak์ ๋ฐฉ์งํ๋ค.
|
| 1145 |
+
if self._sync_checkpointer_conn is not None:
|
| 1146 |
+
try:
|
| 1147 |
+
self._sync_checkpointer_conn.close()
|
| 1148 |
+
except Exception:
|
| 1149 |
+
pass
|
| 1150 |
+
self._sync_checkpointer_conn = conn
|
| 1151 |
+
|
| 1152 |
+
self.graph = build_govon_graph(
|
| 1153 |
+
planner_adapter=planner,
|
| 1154 |
+
executor_adapter=executor,
|
| 1155 |
+
session_store=self.session_store,
|
| 1156 |
+
checkpointer=checkpointer,
|
| 1157 |
+
)
|
| 1158 |
+
logger.info("LangGraph graph ์ด๊ธฐํ ์๋ฃ")
|
| 1159 |
+
|
| 1160 |
+
|
| 1161 |
+
def _build_sync_sqlite_checkpointer(
|
| 1162 |
+
session_db_path: str,
|
| 1163 |
+
) -> tuple:
|
| 1164 |
+
"""SqliteSaver(๋๊ธฐ) ๋๋ MemorySaver(fallback)๋ฅผ ๋ฐํํ๋ค.
|
| 1165 |
+
|
| 1166 |
+
LangGraph checkpointer์ฉ SQLite DB๋ SessionStore์ sessions.sqlite3์
|
| 1167 |
+
๊ฐ์ ๋๋ ํฐ๋ฆฌ์ ๋ณ๋ ํ์ผ ``langgraph_checkpoints.db``๋ก ์์ฑํ๋ค.
|
| 1168 |
+
๋ DB๋ฅผ ๋ถ๋ฆฌํจ์ผ๋ก์จ ๊ด์ฌ์ฌ(์ธ์
๋ฉํ vs. graph ์ฒดํฌํฌ์ธํธ)๋ฅผ ๋ช
ํํ ๊ตฌ๋ถํ๋ค.
|
| 1169 |
+
|
| 1170 |
+
SqliteSaver๋ ํ๋ก์ธ์ค ์ฌ์์ ํ์๋ interrupt ์ํ๋ฅผ SQLite์์ ๋ณต์ํ๋ฏ๋ก
|
| 1171 |
+
MemorySaver์ ๋ฌ๋ฆฌ ์ฌ์์-์์ (restart-safe)ํ๋ค.
|
| 1172 |
+
|
| 1173 |
+
Parameters
|
| 1174 |
+
----------
|
| 1175 |
+
session_db_path : str
|
| 1176 |
+
SessionStore๊ฐ ์ฌ์ฉ ์ค์ธ sessions.sqlite3 ํ์ผ ๊ฒฝ๋ก.
|
| 1177 |
+
์ด ๊ฒฝ๋ก์ ๋ถ๋ชจ ๋๋ ํฐ๋ฆฌ์ langgraph_checkpoints.db๋ฅผ ์์ฑํ๋ค.
|
| 1178 |
+
|
| 1179 |
+
Returns
|
| 1180 |
+
-------
|
| 1181 |
+
tuple[SqliteSaver | MemorySaver, sqlite3.Connection | None]
|
| 1182 |
+
(checkpointer, conn) ํํ.
|
| 1183 |
+
SqliteSaver ์ฌ์ฉ ์ conn์ ์ด๋ฆฐ sqlite3.Connection์ด๋ฉฐ,
|
| 1184 |
+
ํธ์ถ์๊ฐ ์ ์ ํ ์์ ์ closeํด์ผ ํ๋ค.
|
| 1185 |
+
MemorySaver fallback ์ conn์ None์ด๋ค.
|
| 1186 |
+
"""
|
| 1187 |
+
cp_db_path = str(Path(session_db_path).parent / "langgraph_checkpoints.db")
|
| 1188 |
+
try:
|
| 1189 |
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
| 1190 |
+
|
| 1191 |
+
conn = __import__("sqlite3").connect(cp_db_path, check_same_thread=False)
|
| 1192 |
+
saver = SqliteSaver(conn)
|
| 1193 |
+
logger.info(f"LangGraph checkpointer: SqliteSaver ({cp_db_path})")
|
| 1194 |
+
return saver, conn
|
| 1195 |
+
except ImportError:
|
| 1196 |
+
logger.warning(
|
| 1197 |
+
"langgraph-checkpoint-sqlite ๋ฏธ์ค์น โ MemorySaver๋ก fallbackํฉ๋๋ค. "
|
| 1198 |
+
"ํ๋ก์ธ์ค ์ฌ์์ ์ interrupt ์ํ๊ฐ ์๋ฉธ๋ฉ๋๋ค."
|
| 1199 |
+
)
|
| 1200 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 1201 |
+
|
| 1202 |
+
return MemorySaver(), None
|
| 1203 |
+
|
| 1204 |
+
|
| 1205 |
+
manager = vLLMEngineManager()
|
| 1206 |
+
|
| 1207 |
+
|
| 1208 |
+
@asynccontextmanager
|
| 1209 |
+
async def lifespan(app: FastAPI):
|
| 1210 |
+
"""FastAPI lifespan: ๋ชจ๋ธ/์ธ๋ฑ์ค ์ด๊ธฐํ ๋ฐ AsyncSqliteSaver ์
๊ทธ๋ ์ด๋.
|
| 1211 |
+
|
| 1212 |
+
startup ๋จ๊ณ์์ AsyncSqliteSaver๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ๋ฉด graph๋ฅผ ์ฌ๊ตฌ์ฑํ๋ค.
|
| 1213 |
+
AsyncSqliteSaver๋ async ์ปจํ
์คํธ ๋งค๋์ ๋ก ๊ด๋ฆฌํ๋ฉฐ, shutdown ์ ์ ๋ฆฌํ๋ค.
|
| 1214 |
+
AsyncSqliteSaver import ์คํจ ์ _init_graph์์ ์ด๋ฏธ ์ค์ ๋
|
| 1215 |
+
SqliteSaver(๋๋ MemorySaver fallback)๋ฅผ ๊ทธ๋๋ก ์ ์งํ๋ค.
|
| 1216 |
+
"""
|
| 1217 |
+
await manager.initialize()
|
| 1218 |
+
|
| 1219 |
+
# AsyncSqliteSaver๋ก graph ์ฌ๊ตฌ์ฑ ์๋ (๋ ๋์ async ์ฑ๋ฅ)
|
| 1220 |
+
async_cp_db = str(Path(manager.session_store.db_path).parent / "langgraph_checkpoints.db")
|
| 1221 |
+
try:
|
| 1222 |
+
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
|
| 1223 |
+
|
| 1224 |
+
async with AsyncSqliteSaver.from_conn_string(async_cp_db) as async_saver:
|
| 1225 |
+
# ๋๊ธฐ SqliteSaver๊ฐ ๋ณด์ ํ๋ connection์ ๋ซ์ leak์ ๋ฐฉ์งํ๋ค.
|
| 1226 |
+
if manager._sync_checkpointer_conn is not None:
|
| 1227 |
+
try:
|
| 1228 |
+
manager._sync_checkpointer_conn.close()
|
| 1229 |
+
except Exception:
|
| 1230 |
+
pass
|
| 1231 |
+
manager._sync_checkpointer_conn = None
|
| 1232 |
+
manager._checkpointer_ctx = async_saver
|
| 1233 |
+
manager._init_graph_with_async_checkpointer(async_saver)
|
| 1234 |
+
logger.info(f"LangGraph checkpointer: AsyncSqliteSaver ({async_cp_db})")
|
| 1235 |
+
yield
|
| 1236 |
+
manager._checkpointer_ctx = None
|
| 1237 |
+
except ImportError:
|
| 1238 |
+
logger.info("AsyncSqliteSaver ๋ฏธ์ค์น โ SqliteSaver(๋๊ธฐ) ๋๋ MemorySaver๋ก ์คํํฉ๋๋ค.")
|
| 1239 |
+
yield
|
| 1240 |
+
|
| 1241 |
+
|
| 1242 |
+
app = FastAPI(
|
| 1243 |
+
title="GovOn Local Runtime",
|
| 1244 |
+
description="Local FastAPI daemon for the GovOn Agentic Shell MVP.",
|
| 1245 |
+
lifespan=lifespan,
|
| 1246 |
+
)
|
| 1247 |
+
|
| 1248 |
+
ALLOWED_ORIGINS = os.getenv("CORS_ORIGINS", "").split(",")
|
| 1249 |
+
if ALLOWED_ORIGINS and ALLOWED_ORIGINS[0]:
|
| 1250 |
+
app.add_middleware(
|
| 1251 |
+
CORSMiddleware,
|
| 1252 |
+
allow_origins=ALLOWED_ORIGINS,
|
| 1253 |
+
allow_credentials=True,
|
| 1254 |
+
allow_methods=["*"],
|
| 1255 |
+
allow_headers=["*"],
|
| 1256 |
+
)
|
| 1257 |
+
|
| 1258 |
+
if _RATE_LIMIT_AVAILABLE and limiter is not None:
|
| 1259 |
+
app.state.limiter = limiter
|
| 1260 |
+
app.add_middleware(SlowAPIMiddleware)
|
| 1261 |
+
|
| 1262 |
+
|
| 1263 |
+
@app.get("/health")
|
| 1264 |
+
async def health():
|
| 1265 |
+
index_summary = None
|
| 1266 |
+
if manager.index_manager:
|
| 1267 |
+
stats = manager.index_manager.get_index_stats()
|
| 1268 |
+
index_summary = {
|
| 1269 |
+
idx_type: {
|
| 1270 |
+
"loaded": info.get("loaded", False),
|
| 1271 |
+
"doc_count": info.get("doc_count", 0),
|
| 1272 |
+
}
|
| 1273 |
+
for idx_type, info in stats.get("indexes", {}).items()
|
| 1274 |
+
}
|
| 1275 |
+
|
| 1276 |
+
bm25_summary = {}
|
| 1277 |
+
for idx_type in IndexType:
|
| 1278 |
+
indexer = manager.bm25_indexers.get(idx_type)
|
| 1279 |
+
if indexer and indexer.is_ready():
|
| 1280 |
+
bm25_summary[idx_type.value] = {"loaded": True, "doc_count": indexer.doc_count}
|
| 1281 |
+
else:
|
| 1282 |
+
bm25_summary[idx_type.value] = {"loaded": False}
|
| 1283 |
+
|
| 1284 |
+
return {
|
| 1285 |
+
"status": "healthy",
|
| 1286 |
+
"profile": runtime_config.profile.value,
|
| 1287 |
+
"model": runtime_config.model.model_path,
|
| 1288 |
+
"rag_enabled": manager.index_manager is not None or manager.retriever is not None,
|
| 1289 |
+
"agents_loaded": manager.agent_manager.list_agents() if manager.agent_manager else [],
|
| 1290 |
+
"indexes": index_summary,
|
| 1291 |
+
"bm25_indexes": bm25_summary,
|
| 1292 |
+
"hybrid_search_enabled": manager.hybrid_engine is not None,
|
| 1293 |
+
"local_documents": {
|
| 1294 |
+
"enabled": bool(runtime_config.paths.local_docs_root),
|
| 1295 |
+
"root_dir": runtime_config.paths.local_docs_root or None,
|
| 1296 |
+
"last_sync": manager.local_document_sync_status,
|
| 1297 |
+
},
|
| 1298 |
+
"feature_flags": {
|
| 1299 |
+
"use_rag_pipeline": manager.feature_flags.use_rag_pipeline,
|
| 1300 |
+
"model_version": manager.feature_flags.model_version,
|
| 1301 |
+
},
|
| 1302 |
+
"session_store": {
|
| 1303 |
+
"driver": "sqlite",
|
| 1304 |
+
"path": manager.session_store.db_path,
|
| 1305 |
+
},
|
| 1306 |
+
}
|
| 1307 |
+
|
| 1308 |
+
|
| 1309 |
+
def _rate_limit(limit_string: str):
|
| 1310 |
+
if _RATE_LIMIT_AVAILABLE and limiter is not None:
|
| 1311 |
+
return limiter.limit(limit_string)
|
| 1312 |
+
|
| 1313 |
+
def _noop(func):
|
| 1314 |
+
return func
|
| 1315 |
+
|
| 1316 |
+
return _noop
|
| 1317 |
+
|
| 1318 |
+
|
| 1319 |
+
def get_feature_flags(request: Request) -> FeatureFlags:
|
| 1320 |
+
header = request.headers.get("X-Feature-Flag")
|
| 1321 |
+
return manager.feature_flags.override_from_header(header)
|
| 1322 |
+
|
| 1323 |
+
|
| 1324 |
+
@app.post("/v1/generate-civil-response", response_model=GenerateCivilResponseResponse)
|
| 1325 |
+
@_rate_limit("30/minute")
|
| 1326 |
+
async def generate_civil_response(
|
| 1327 |
+
request: GenerateCivilResponseRequest,
|
| 1328 |
+
_: None = Depends(verify_api_key),
|
| 1329 |
+
flags: FeatureFlags = Depends(get_feature_flags),
|
| 1330 |
+
):
|
| 1331 |
+
if request.stream:
|
| 1332 |
+
raise HTTPException(status_code=400, detail="๋ฏผ์ ๋ต๋ณ ์คํธ๋ฆฌ๋ฐ์ /v1/stream์ ์ฌ์ฉํ์ธ์.")
|
| 1333 |
+
|
| 1334 |
+
request_id = str(uuid.uuid4())
|
| 1335 |
+
final_output, retrieved_cases, search_results = await manager.generate_civil_response(
|
| 1336 |
+
request,
|
| 1337 |
+
request_id,
|
| 1338 |
+
flags,
|
| 1339 |
+
)
|
| 1340 |
+
if final_output is None:
|
| 1341 |
+
raise HTTPException(status_code=500, detail="๋ฏผ์ ๋ต๋ณ ์์ฑ์ ์คํจํ์ต๋๋ค.")
|
| 1342 |
+
|
| 1343 |
+
return GenerateCivilResponseResponse(
|
| 1344 |
+
request_id=request_id,
|
| 1345 |
+
complaint_id=request.complaint_id,
|
| 1346 |
+
text=manager._strip_thought_blocks(final_output.outputs[0].text),
|
| 1347 |
+
prompt_tokens=len(final_output.prompt_token_ids),
|
| 1348 |
+
completion_tokens=len(final_output.outputs[0].token_ids),
|
| 1349 |
+
retrieved_cases=[RetrievedCase(**case) for case in retrieved_cases],
|
| 1350 |
+
search_results=search_results,
|
| 1351 |
+
)
|
| 1352 |
+
|
| 1353 |
+
|
| 1354 |
+
@app.post("/v1/generate", response_model=GenerateResponse)
|
| 1355 |
+
@_rate_limit("30/minute")
|
| 1356 |
+
async def generate(
|
| 1357 |
+
request: GenerateRequest,
|
| 1358 |
+
_: None = Depends(verify_api_key),
|
| 1359 |
+
flags: FeatureFlags = Depends(get_feature_flags),
|
| 1360 |
+
):
|
| 1361 |
+
if request.stream:
|
| 1362 |
+
raise HTTPException(status_code=400, detail="Use /v1/stream for streaming.")
|
| 1363 |
+
|
| 1364 |
+
request_id = str(uuid.uuid4())
|
| 1365 |
+
final_output, retrieved_cases = await manager.generate(request, request_id, flags)
|
| 1366 |
+
if final_output is None:
|
| 1367 |
+
raise HTTPException(status_code=500, detail="Generation failed.")
|
| 1368 |
+
|
| 1369 |
+
return GenerateResponse(
|
| 1370 |
+
request_id=request_id,
|
| 1371 |
+
complaint_id=request.complaint_id,
|
| 1372 |
+
text=manager._strip_thought_blocks(final_output.outputs[0].text),
|
| 1373 |
+
prompt_tokens=len(final_output.prompt_token_ids),
|
| 1374 |
+
completion_tokens=len(final_output.outputs[0].token_ids),
|
| 1375 |
+
retrieved_cases=[RetrievedCase(**case) for case in retrieved_cases],
|
| 1376 |
+
)
|
| 1377 |
+
|
| 1378 |
+
|
| 1379 |
+
@app.post("/v1/stream")
|
| 1380 |
+
@_rate_limit("30/minute")
|
| 1381 |
+
async def stream_generate(
|
| 1382 |
+
request: GenerateRequest,
|
| 1383 |
+
_: None = Depends(verify_api_key),
|
| 1384 |
+
flags: FeatureFlags = Depends(get_feature_flags),
|
| 1385 |
+
):
|
| 1386 |
+
if not request.stream:
|
| 1387 |
+
request.stream = True
|
| 1388 |
+
|
| 1389 |
+
request_id = str(uuid.uuid4())
|
| 1390 |
+
results_stream, retrieved_cases, search_results = await manager.generate_stream(
|
| 1391 |
+
request,
|
| 1392 |
+
request_id,
|
| 1393 |
+
flags,
|
| 1394 |
+
)
|
| 1395 |
+
|
| 1396 |
+
async def stream_results() -> AsyncGenerator[str, None]:
|
| 1397 |
+
cases_data = [RetrievedCase(**case).model_dump() for case in retrieved_cases]
|
| 1398 |
+
search_data = [result.model_dump() for result in search_results]
|
| 1399 |
+
|
| 1400 |
+
async for request_output in results_stream:
|
| 1401 |
+
text = request_output.outputs[0].text
|
| 1402 |
+
finished = request_output.finished
|
| 1403 |
+
if finished:
|
| 1404 |
+
text = manager._strip_thought_blocks(text)
|
| 1405 |
+
|
| 1406 |
+
response_obj = {"request_id": request_id, "text": text, "finished": finished}
|
| 1407 |
+
if finished:
|
| 1408 |
+
response_obj["retrieved_cases"] = cases_data
|
| 1409 |
+
response_obj["search_results"] = search_data
|
| 1410 |
+
|
| 1411 |
+
yield f"data: {json.dumps(response_obj, ensure_ascii=False)}\n\n"
|
| 1412 |
+
|
| 1413 |
+
return StreamingResponse(stream_results(), media_type="text/event-stream")
|
| 1414 |
+
|
| 1415 |
+
|
| 1416 |
+
@app.post("/v1/search", response_model=SearchResponse)
|
| 1417 |
+
@app.post("/search", response_model=SearchResponse)
|
| 1418 |
+
@_rate_limit("60/minute")
|
| 1419 |
+
async def search(request: SearchRequest, _: Request, __: None = Depends(verify_api_key)):
|
| 1420 |
+
start_time = time.monotonic()
|
| 1421 |
+
try:
|
| 1422 |
+
if manager.hybrid_engine:
|
| 1423 |
+
results_raw, actual_mode = await manager.hybrid_engine.search(
|
| 1424 |
+
query=request.query,
|
| 1425 |
+
index_type=request.doc_type,
|
| 1426 |
+
top_k=request.top_k,
|
| 1427 |
+
mode=request.search_mode,
|
| 1428 |
+
)
|
| 1429 |
+
results = [
|
| 1430 |
+
SearchResult(
|
| 1431 |
+
doc_id=result.get("doc_id", ""),
|
| 1432 |
+
source_type=IndexType(result.get("doc_type", request.doc_type.value)),
|
| 1433 |
+
title=result.get("title", ""),
|
| 1434 |
+
content=_extract_content_by_type(result, request.doc_type),
|
| 1435 |
+
score=result.get("score", 0.0),
|
| 1436 |
+
reliability_score=result.get("reliability_score", 1.0),
|
| 1437 |
+
metadata=result.get("extras", {}),
|
| 1438 |
+
chunk_index=result.get("chunk_index", 0),
|
| 1439 |
+
total_chunks=result.get("chunk_total", 1),
|
| 1440 |
+
)
|
| 1441 |
+
for result in results_raw
|
| 1442 |
+
]
|
| 1443 |
+
elif manager.retriever:
|
| 1444 |
+
raw_results = manager.retriever.search(request.query, top_k=request.top_k)
|
| 1445 |
+
results = [
|
| 1446 |
+
SearchResult(
|
| 1447 |
+
doc_id=raw.get("id", raw.get("doc_id", "")),
|
| 1448 |
+
source_type=request.doc_type,
|
| 1449 |
+
title=raw.get("category", ""),
|
| 1450 |
+
content=raw.get("complaint", "") + "\n" + raw.get("answer", ""),
|
| 1451 |
+
score=raw.get("score", 0.0),
|
| 1452 |
+
reliability_score=raw.get("reliability_score", 1.0),
|
| 1453 |
+
)
|
| 1454 |
+
for raw in raw_results
|
| 1455 |
+
]
|
| 1456 |
+
actual_mode = SearchMode.DENSE
|
| 1457 |
+
else:
|
| 1458 |
+
raise HTTPException(status_code=503, detail="๊ฒ์ ์์ง์ด ์์ง ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1459 |
+
|
| 1460 |
+
elapsed_ms = (time.monotonic() - start_time) * 1000
|
| 1461 |
+
actual_search_mode = actual_mode if actual_mode != request.search_mode else None
|
| 1462 |
+
return SearchResponse(
|
| 1463 |
+
query=request.query,
|
| 1464 |
+
doc_type=request.doc_type,
|
| 1465 |
+
search_mode=request.search_mode,
|
| 1466 |
+
actual_search_mode=actual_search_mode,
|
| 1467 |
+
results=results,
|
| 1468 |
+
total=len(results),
|
| 1469 |
+
search_time_ms=round(elapsed_ms, 2),
|
| 1470 |
+
)
|
| 1471 |
+
except HTTPException:
|
| 1472 |
+
raise
|
| 1473 |
+
except Exception as exc:
|
| 1474 |
+
logger.error(f"๊ฒ์ ์ค ์ค๋ฅ ๋ฐ์: {exc}", exc_info=True)
|
| 1475 |
+
raise HTTPException(status_code=500, detail="๊ฒ์ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.")
|
| 1476 |
+
|
| 1477 |
+
|
| 1478 |
+
def _trace_to_schema(trace: AgentTrace) -> AgentTraceSchema:
|
| 1479 |
+
return AgentTraceSchema(
|
| 1480 |
+
request_id=trace.request_id,
|
| 1481 |
+
session_id=trace.session_id,
|
| 1482 |
+
plan=trace.plan.tool_names if trace.plan else [],
|
| 1483 |
+
plan_reason=trace.plan.reason if trace.plan else "",
|
| 1484 |
+
tool_results=[
|
| 1485 |
+
ToolResultSchema(
|
| 1486 |
+
tool=tool_name(result.tool),
|
| 1487 |
+
success=result.success,
|
| 1488 |
+
latency_ms=round(result.latency_ms, 2),
|
| 1489 |
+
data=result.data,
|
| 1490 |
+
error=result.error,
|
| 1491 |
+
)
|
| 1492 |
+
for result in trace.tool_results
|
| 1493 |
+
],
|
| 1494 |
+
total_latency_ms=round(trace.total_latency_ms, 2),
|
| 1495 |
+
error=trace.error,
|
| 1496 |
+
)
|
| 1497 |
+
|
| 1498 |
+
|
| 1499 |
+
@app.post("/v1/agent/run", response_model=AgentRunResponse)
|
| 1500 |
+
@_rate_limit("30/minute")
|
| 1501 |
+
async def agent_run(
|
| 1502 |
+
request: AgentRunRequest,
|
| 1503 |
+
_: None = Depends(verify_api_key),
|
| 1504 |
+
):
|
| 1505 |
+
if not manager.agent_loop:
|
| 1506 |
+
raise HTTPException(status_code=503, detail="์์ด์ ํธ ๋ฃจํ๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1507 |
+
if request.stream:
|
| 1508 |
+
raise HTTPException(status_code=400, detail="์คํธ๋ฆฌ๋ฐ์ /v1/agent/stream์ ์ฌ์ฉํ์ธ์.")
|
| 1509 |
+
|
| 1510 |
+
session = manager.session_store.get_or_create(session_id=request.session_id)
|
| 1511 |
+
request_id = str(uuid.uuid4())
|
| 1512 |
+
trace = await manager.agent_loop.run(
|
| 1513 |
+
query=request.query,
|
| 1514 |
+
session=session,
|
| 1515 |
+
request_id=request_id,
|
| 1516 |
+
force_tools=request.force_tools,
|
| 1517 |
+
)
|
| 1518 |
+
|
| 1519 |
+
search_results = None
|
| 1520 |
+
for result in trace.tool_results:
|
| 1521 |
+
if tool_name(result.tool) == ToolType.RAG_SEARCH.value and result.success:
|
| 1522 |
+
search_results = result.data.get("results")
|
| 1523 |
+
elif (
|
| 1524 |
+
tool_name(result.tool) == ToolType.API_LOOKUP.value
|
| 1525 |
+
and result.success
|
| 1526 |
+
and not search_results
|
| 1527 |
+
):
|
| 1528 |
+
search_results = result.data.get("results")
|
| 1529 |
+
|
| 1530 |
+
return AgentRunResponse(
|
| 1531 |
+
request_id=request_id,
|
| 1532 |
+
session_id=session.session_id,
|
| 1533 |
+
text=trace.final_text,
|
| 1534 |
+
trace=_trace_to_schema(trace),
|
| 1535 |
+
search_results=search_results,
|
| 1536 |
+
)
|
| 1537 |
+
|
| 1538 |
+
|
| 1539 |
+
@app.post("/v1/agent/stream")
|
| 1540 |
+
@_rate_limit("30/minute")
|
| 1541 |
+
async def agent_stream(
|
| 1542 |
+
request: AgentRunRequest,
|
| 1543 |
+
_: None = Depends(verify_api_key),
|
| 1544 |
+
):
|
| 1545 |
+
if not manager.agent_loop:
|
| 1546 |
+
raise HTTPException(status_code=503, detail="์์ด์ ํธ ๋ฃจํ๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1547 |
+
|
| 1548 |
+
session = manager.session_store.get_or_create(session_id=request.session_id)
|
| 1549 |
+
request_id = str(uuid.uuid4())
|
| 1550 |
+
|
| 1551 |
+
async def stream_events() -> AsyncGenerator[str, None]:
|
| 1552 |
+
async for event in manager.agent_loop.run_stream(
|
| 1553 |
+
query=request.query,
|
| 1554 |
+
session=session,
|
| 1555 |
+
request_id=request_id,
|
| 1556 |
+
force_tools=request.force_tools,
|
| 1557 |
+
):
|
| 1558 |
+
yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
|
| 1559 |
+
|
| 1560 |
+
return StreamingResponse(stream_events(), media_type="text/event-stream")
|
| 1561 |
+
|
| 1562 |
+
|
| 1563 |
+
# ---------------------------------------------------------------------------
|
| 1564 |
+
# v2 ์๋ํฌ์ธํธ: LangGraph ๊ธฐ๋ฐ agent ์คํ (interrupt/approve ํจํด)
|
| 1565 |
+
# ---------------------------------------------------------------------------
|
| 1566 |
+
|
| 1567 |
+
|
| 1568 |
+
@app.post("/v2/agent/stream")
|
| 1569 |
+
async def v2_agent_stream(
|
| 1570 |
+
request: AgentRunRequest,
|
| 1571 |
+
_: None = Depends(verify_api_key),
|
| 1572 |
+
):
|
| 1573 |
+
"""LangGraph ๊ธฐ๋ฐ agent SSE ์คํธ๋ฆฌ๋ฐ ์คํ.
|
| 1574 |
+
|
| 1575 |
+
graph.astream()์ ์ฌ์ฉํด ๋
ธ๋๋ณ ์๋ฃ ์ด๋ฒคํธ๋ฅผ SSE๋ก ์ ์กํ๋ค.
|
| 1576 |
+
|
| 1577 |
+
์ด๋ฒคํธ ํ์ (๊ฐ ์ค: ``data: <JSON>\\n\\n``):
|
| 1578 |
+
- ๋
ธ๋ ์งํ: ``{"node": "<name>", "status": "completed", ...}``
|
| 1579 |
+
- approval_wait ๋๋ฌ:
|
| 1580 |
+
``{"node": "approval_wait", "status": "awaiting_approval",
|
| 1581 |
+
"approval_request": {...}, "thread_id": "..."}``
|
| 1582 |
+
- ์ค๋ฅ: ``{"node": "error", "status": "error", "error": "..."}``
|
| 1583 |
+
|
| 1584 |
+
์น์ธ ํ๋ฆ:
|
| 1585 |
+
- ํด๋ผ์ด์ธํธ๋ ``awaiting_approval`` ์ด๋ฒคํธ ์์ ํ ์คํธ๋ฆผ์ด ์ข
๋ฃ๋จ์ ์ธ์งํ๊ณ
|
| 1586 |
+
``/v2/agent/approve``๋ก ์น์ธ/๊ฑฐ์ ์ ์ ๋ฌํ๋ค.
|
| 1587 |
+
"""
|
| 1588 |
+
if not manager.graph:
|
| 1589 |
+
|
| 1590 |
+
async def _no_graph():
|
| 1591 |
+
yield 'data: {"node": "error", "status": "error", "error": "LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค."}\n\n'
|
| 1592 |
+
|
| 1593 |
+
return StreamingResponse(_no_graph(), media_type="text/event-stream")
|
| 1594 |
+
|
| 1595 |
+
from langchain_core.messages import HumanMessage
|
| 1596 |
+
|
| 1597 |
+
thread_id = request.session_id or str(uuid.uuid4())
|
| 1598 |
+
session_id = thread_id
|
| 1599 |
+
request_id = str(uuid.uuid4())
|
| 1600 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 1601 |
+
initial_state = {
|
| 1602 |
+
"session_id": session_id,
|
| 1603 |
+
"request_id": request_id,
|
| 1604 |
+
"messages": [HumanMessage(content=request.query)],
|
| 1605 |
+
}
|
| 1606 |
+
|
| 1607 |
+
async def _generate() -> AsyncGenerator[str, None]:
|
| 1608 |
+
try:
|
| 1609 |
+
async for chunk in manager.graph.astream(initial_state, config, stream_mode="updates"):
|
| 1610 |
+
# chunk: {node_name: state_delta}
|
| 1611 |
+
for node_name, state_delta in chunk.items():
|
| 1612 |
+
event: dict = {
|
| 1613 |
+
"node": node_name,
|
| 1614 |
+
"status": "completed",
|
| 1615 |
+
}
|
| 1616 |
+
# synthesis ์๋ฃ ์ evidence_items์ task_type์ ์ด๋ฒคํธ์ ํฌํจ.
|
| 1617 |
+
# ์ ์ : stream_mode="updates"์์ state_delta๋ ๋
ธ๋์ raw return dict๋ค.
|
| 1618 |
+
# LangGraph ๋ฒ์ ์
๊ทธ๋ ์ด๋ ์ ์ด ๊ตฌ์กฐ๊ฐ ๋ณ๊ฒฝ๋ ์ ์์ผ๋ฏ๋ก ์ฃผ์.
|
| 1619 |
+
# evidence_items ์คํค๋ง: EvidenceItem.to_dict() ํ๋๋ฅผ ๋ฐ๋ฅธ๋ค.
|
| 1620 |
+
# source_type: "rag" | "api" | "llm_generated"
|
| 1621 |
+
# title, excerpt, link_or_path, page, score, provider_meta
|
| 1622 |
+
# (์น ํ๋ก ํธ์๋์์ ์ง์ ๋ ๋๋ง ์ XSS ๋ฐฉ์ง๋ฅผ ์ํด ์ด์ค์ผ์ดํ ํ์)
|
| 1623 |
+
if node_name == "synthesis" and isinstance(state_delta, dict):
|
| 1624 |
+
if state_delta.get("final_text"):
|
| 1625 |
+
event["final_text"] = state_delta["final_text"]
|
| 1626 |
+
if state_delta.get("evidence_items"):
|
| 1627 |
+
event["evidence_items"] = state_delta["evidence_items"]
|
| 1628 |
+
if state_delta.get("task_type"):
|
| 1629 |
+
event["task_type"] = state_delta["task_type"]
|
| 1630 |
+
if node_name == "approval_wait":
|
| 1631 |
+
try:
|
| 1632 |
+
graph_state = await manager.graph.aget_state(config)
|
| 1633 |
+
if graph_state.next:
|
| 1634 |
+
event = {
|
| 1635 |
+
"node": "approval_wait",
|
| 1636 |
+
"status": "awaiting_approval",
|
| 1637 |
+
"approval_request": _extract_approval_request(graph_state),
|
| 1638 |
+
"thread_id": thread_id,
|
| 1639 |
+
"session_id": session_id,
|
| 1640 |
+
}
|
| 1641 |
+
except Exception as exc:
|
| 1642 |
+
logger.warning(f"[v2/agent/stream] aget_state ์คํจ: {exc}")
|
| 1643 |
+
event["status"] = "awaiting_approval"
|
| 1644 |
+
event["approval_request"] = {
|
| 1645 |
+
"prompt": "์น์ธ ์ ๋ณด๋ฅผ ๋ถ๋ฌ์ฌ ์ ์์ต๋๋ค. /v2/agent/approve๋ก ์งํํ์ธ์."
|
| 1646 |
+
}
|
| 1647 |
+
|
| 1648 |
+
yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n"
|
| 1649 |
+
|
| 1650 |
+
# Stop streaming after awaiting_approval (client must call /v2/agent/approve)
|
| 1651 |
+
if event.get("status") == "awaiting_approval":
|
| 1652 |
+
return
|
| 1653 |
+
except Exception as exc:
|
| 1654 |
+
logger.error(f"[v2/agent/stream] ์คํธ๋ฆผ ์์ธ: {exc}")
|
| 1655 |
+
error_event = {"node": "error", "status": "error", "error": str(exc)}
|
| 1656 |
+
yield f"data: {json.dumps(error_event, ensure_ascii=False)}\n\n"
|
| 1657 |
+
|
| 1658 |
+
return StreamingResponse(_generate(), media_type="text/event-stream")
|
| 1659 |
+
|
| 1660 |
+
|
| 1661 |
+
@app.post("/v2/agent/run")
|
| 1662 |
+
async def v2_agent_run(
|
| 1663 |
+
request: AgentRunRequest,
|
| 1664 |
+
_: None = Depends(verify_api_key),
|
| 1665 |
+
):
|
| 1666 |
+
"""LangGraph ๊ธฐ๋ฐ agent ์คํ (1๋จ๊ณ: interrupt๊น์ง).
|
| 1667 |
+
|
| 1668 |
+
graph๋ฅผ ์คํํ์ฌ `approval_wait` ๋
ธ๋์์ interrupt๋๋ฉด
|
| 1669 |
+
`status: awaiting_approval`๊ณผ ํจ๊ป ์น์ธ ์์ฒญ ์ ๋ณด๋ฅผ ๋ฐํํ๋ค.
|
| 1670 |
+
|
| 1671 |
+
ํด๋ผ์ด์ธํธ๋ ๋ฐํ๋ `thread_id`๋ฅผ ์ ์ฅํด๋๊ณ
|
| 1672 |
+
`/v2/agent/approve`๋ก ์น์ธ/๊ฑฐ์ ์ ์ ๋ฌํด์ผ ํ๋ค.
|
| 1673 |
+
|
| 1674 |
+
Session Resume Contract
|
| 1675 |
+
-----------------------
|
| 1676 |
+
๋์ผ session_id๋ก ์ฌ์์ฒญํ๋ ๊ฒฝ์ฐ ๋ค์ ๊ท์น์ ๋ฐ๋ฅธ๋ค:
|
| 1677 |
+
|
| 1678 |
+
1. **interrupt ๋๊ธฐ ์ค**: graph๊ฐ approval_wait์์ interrupt ์ํ์ด๋ฉด
|
| 1679 |
+
ํ์ฌ checkpoint์์ resumeํ์ง ์๊ณ ์ ๋ฉ์์ง๋ฅผ *์ถ๊ฐํ์ฌ* ์ด์ด์ ์คํํ๋ค.
|
| 1680 |
+
(์ฌ์์ฒญ์ ์ graph_run์ผ๋ก ์ฒ๋ฆฌํ๋ค.)
|
| 1681 |
+
์น์ธ/๊ฑฐ์ ์ ๋ฐ๋์ `/v2/agent/approve`๋ฅผ ํตํด ์ฒ๋ฆฌํด์ผ ํ๋ค.
|
| 1682 |
+
|
| 1683 |
+
2. **์๋ฃ๋ graph**: graph๊ฐ END์ ๋๋ฌํ ์ํ(state.next == [])์ด๋ฉด
|
| 1684 |
+
๋์ผ thread_id์ ์ graph_run์ ์์ํ๋ค. LangGraph checkpointer๊ฐ
|
| 1685 |
+
๋์ผ thread_id์์ ์ด์ ์ํ๋ฅผ ๋์ ํ๋ฏ๋ก ๋ํ ํ์คํ ๋ฆฌ๊ฐ ๋ณด์กด๋๋ค.
|
| 1686 |
+
|
| 1687 |
+
3. **ํ๋ก์ธ์ค ์ฌ์์ ํ**: SqliteSaver ์ฌ์ฉ ์ DB์์ checkpoint๊ฐ ๋ณต์๋๋ฏ๋ก
|
| 1688 |
+
interrupt ์ํ๊ฐ ์ ์ง๋๋ค. ํด๋ผ์ด์ธํธ๋ ๊ธฐ์กด thread_id๋ก `/v2/agent/approve`
|
| 1689 |
+
๋ฅผ ๋ค์ ํธ์ถํ๋ฉด ์ค๋จ๋ ์ง์ ์์ resumeํ ์ ์๋ค.
|
| 1690 |
+
|
| 1691 |
+
Note: session_id == thread_id. ๋ ๊ฐ์ ํญ์ ๋์ผํ๊ฒ ์ ์ง๋๋ค.
|
| 1692 |
+
"""
|
| 1693 |
+
if not manager.graph:
|
| 1694 |
+
raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1695 |
+
|
| 1696 |
+
from langchain_core.messages import HumanMessage
|
| 1697 |
+
|
| 1698 |
+
thread_id = request.session_id or str(uuid.uuid4())
|
| 1699 |
+
session_id = thread_id # thread_id๋ฅผ session_id๋ก ํ์ (session_id == thread_id ๋ถ๋ณ)
|
| 1700 |
+
request_id = str(uuid.uuid4())
|
| 1701 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 1702 |
+
initial_state = {
|
| 1703 |
+
"session_id": session_id,
|
| 1704 |
+
"request_id": request_id,
|
| 1705 |
+
"messages": [HumanMessage(content=request.query)],
|
| 1706 |
+
}
|
| 1707 |
+
|
| 1708 |
+
try:
|
| 1709 |
+
await manager.graph.ainvoke(initial_state, config)
|
| 1710 |
+
|
| 1711 |
+
# interrupt ์ํ ํ์ธ
|
| 1712 |
+
graph_state = await manager.graph.aget_state(config)
|
| 1713 |
+
if graph_state.next:
|
| 1714 |
+
# interrupt ๋๊ธฐ ์ค: approval_request ์ ๋ณด๋ฅผ ํด๋ผ์ด์ธํธ์ ๋ฐํ
|
| 1715 |
+
return {
|
| 1716 |
+
"status": "awaiting_approval",
|
| 1717 |
+
"thread_id": thread_id,
|
| 1718 |
+
"session_id": session_id,
|
| 1719 |
+
"graph_run_id": request_id,
|
| 1720 |
+
"approval_request": _extract_approval_request(graph_state),
|
| 1721 |
+
}
|
| 1722 |
+
|
| 1723 |
+
# interrupt ์์ด ์๋ฃ๋ ๊ฒฝ์ฐ (rejected ๋๋ ์ค๋ฅ)
|
| 1724 |
+
final_state = graph_state.values
|
| 1725 |
+
return {
|
| 1726 |
+
"status": "completed",
|
| 1727 |
+
"thread_id": thread_id,
|
| 1728 |
+
"session_id": session_id,
|
| 1729 |
+
"graph_run_id": request_id,
|
| 1730 |
+
"text": final_state.get("final_text", ""),
|
| 1731 |
+
"evidence_items": final_state.get("evidence_items", []),
|
| 1732 |
+
"task_type": final_state.get("task_type", ""),
|
| 1733 |
+
}
|
| 1734 |
+
except Exception as exc:
|
| 1735 |
+
logger.error(f"[v2/agent/run] ์์ธ ๋ฐ์: {exc}")
|
| 1736 |
+
# graph_run์ "error" status๋ก ๊ธฐ๋ก ์๋
|
| 1737 |
+
try:
|
| 1738 |
+
if manager.session_store:
|
| 1739 |
+
session = manager.session_store.get_or_create(session_id)
|
| 1740 |
+
session.add_graph_run(
|
| 1741 |
+
request_id=request_id,
|
| 1742 |
+
plan_summary=f"[error] {exc}",
|
| 1743 |
+
approval_status="",
|
| 1744 |
+
executed_capabilities=[],
|
| 1745 |
+
status="error",
|
| 1746 |
+
total_latency_ms=0.0,
|
| 1747 |
+
)
|
| 1748 |
+
except Exception as persist_exc:
|
| 1749 |
+
logger.warning(f"[v2/agent/run] error persist ์คํจ: {persist_exc}")
|
| 1750 |
+
return {
|
| 1751 |
+
"status": "error",
|
| 1752 |
+
"thread_id": thread_id,
|
| 1753 |
+
"session_id": session_id,
|
| 1754 |
+
"graph_run_id": request_id,
|
| 1755 |
+
"error": str(exc),
|
| 1756 |
+
}
|
| 1757 |
+
|
| 1758 |
+
|
| 1759 |
+
@app.post("/v2/agent/approve")
|
| 1760 |
+
async def v2_agent_approve(
|
| 1761 |
+
thread_id: str,
|
| 1762 |
+
approved: bool,
|
| 1763 |
+
_: None = Depends(verify_api_key),
|
| 1764 |
+
):
|
| 1765 |
+
"""interrupt๋ graph๋ฅผ resumeํ๋ค (2๋จ๊ณ: ์น์ธ/๊ฑฐ์ ).
|
| 1766 |
+
|
| 1767 |
+
Parameters
|
| 1768 |
+
----------
|
| 1769 |
+
thread_id : str
|
| 1770 |
+
`/v2/agent/run`์์ ๋ฐํ๋ thread_id.
|
| 1771 |
+
approved : bool
|
| 1772 |
+
True๋ฉด tool_execute๋ก ์งํ, False๋ฉด graph๊ฐ END๋ก ์ข
๋ฃ.
|
| 1773 |
+
"""
|
| 1774 |
+
if not manager.graph:
|
| 1775 |
+
raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1776 |
+
|
| 1777 |
+
from langgraph.types import Command
|
| 1778 |
+
|
| 1779 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 1780 |
+
|
| 1781 |
+
try:
|
| 1782 |
+
result = await manager.graph.ainvoke(
|
| 1783 |
+
Command(resume={"approved": approved}),
|
| 1784 |
+
config,
|
| 1785 |
+
)
|
| 1786 |
+
|
| 1787 |
+
# ๊ฑฐ์ ์ด๋ฉด "rejected", ์น์ธ ์๋ฃ๋ฉด "completed"
|
| 1788 |
+
approval_status = result.get("approval_status", "")
|
| 1789 |
+
if not approved:
|
| 1790 |
+
response_status = "rejected"
|
| 1791 |
+
else:
|
| 1792 |
+
response_status = "completed"
|
| 1793 |
+
|
| 1794 |
+
return {
|
| 1795 |
+
"status": response_status,
|
| 1796 |
+
"thread_id": thread_id,
|
| 1797 |
+
"session_id": result.get("session_id", ""),
|
| 1798 |
+
"graph_run_id": result.get("request_id", ""),
|
| 1799 |
+
"text": result.get("final_text", ""),
|
| 1800 |
+
"evidence_items": result.get("evidence_items", []),
|
| 1801 |
+
"task_type": result.get("task_type", ""),
|
| 1802 |
+
"tool_results": result.get("tool_results", {}),
|
| 1803 |
+
"approval_status": approval_status,
|
| 1804 |
+
}
|
| 1805 |
+
except Exception as exc:
|
| 1806 |
+
logger.error(f"[v2/agent/approve] ์์ธ ๋ฐ์: {exc}")
|
| 1807 |
+
# graph_run์ "error" status๋ก ๊ธฐ๋ก ์๋
|
| 1808 |
+
session_id = ""
|
| 1809 |
+
request_id = ""
|
| 1810 |
+
try:
|
| 1811 |
+
if manager.session_store:
|
| 1812 |
+
graph_state = await manager.graph.aget_state(config)
|
| 1813 |
+
state_values = graph_state.values if graph_state else {}
|
| 1814 |
+
session_id = state_values.get("session_id", "")
|
| 1815 |
+
request_id = state_values.get("request_id", "")
|
| 1816 |
+
if session_id:
|
| 1817 |
+
session = manager.session_store.get_or_create(session_id)
|
| 1818 |
+
session.add_graph_run(
|
| 1819 |
+
request_id=request_id,
|
| 1820 |
+
plan_summary=f"[error] {exc}",
|
| 1821 |
+
approval_status="",
|
| 1822 |
+
executed_capabilities=[],
|
| 1823 |
+
status="error",
|
| 1824 |
+
total_latency_ms=0.0,
|
| 1825 |
+
)
|
| 1826 |
+
except Exception as persist_exc:
|
| 1827 |
+
logger.warning(f"[v2/agent/approve] error persist ์คํจ: {persist_exc}")
|
| 1828 |
+
return {
|
| 1829 |
+
"status": "error",
|
| 1830 |
+
"thread_id": thread_id,
|
| 1831 |
+
"session_id": session_id,
|
| 1832 |
+
"graph_run_id": request_id,
|
| 1833 |
+
"error": str(exc),
|
| 1834 |
+
}
|
| 1835 |
+
|
| 1836 |
+
|
| 1837 |
+
@app.post("/v2/agent/cancel")
|
| 1838 |
+
async def v2_agent_cancel(
|
| 1839 |
+
thread_id: str,
|
| 1840 |
+
_: None = Depends(verify_api_key),
|
| 1841 |
+
):
|
| 1842 |
+
"""interrupt ๋๊ธฐ ์ค์ธ graph๋ฅผ ๊ฐ์ ์ทจ์ํ๋ค.
|
| 1843 |
+
|
| 1844 |
+
interrupt ์ํ์์ ๊ฑฐ์ ์ฒ๋ฆฌ(Command(resume={"approved": False}))๋ฅผ ์ํํ๋,
|
| 1845 |
+
state์ interrupt_reason="user_cancel"์ ์ ๋ฌํ์ฌ
|
| 1846 |
+
persist ๋
ธ๋๊ฐ graph_run status๋ฅผ "interrupted"๋ก ๊ธฐ๋กํ๊ฒ ํ๋ค.
|
| 1847 |
+
|
| 1848 |
+
Parameters
|
| 1849 |
+
----------
|
| 1850 |
+
thread_id : str
|
| 1851 |
+
`/v2/agent/run`์์ ๋ฐํ๋ thread_id.
|
| 1852 |
+
"""
|
| 1853 |
+
if not manager.graph:
|
| 1854 |
+
raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.")
|
| 1855 |
+
|
| 1856 |
+
from langgraph.types import Command
|
| 1857 |
+
|
| 1858 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 1859 |
+
|
| 1860 |
+
try:
|
| 1861 |
+
# interrupt ์ํ ํ์ธ
|
| 1862 |
+
graph_state = await manager.graph.aget_state(config)
|
| 1863 |
+
if not graph_state or not graph_state.next:
|
| 1864 |
+
raise HTTPException(
|
| 1865 |
+
status_code=409,
|
| 1866 |
+
detail="ํด๋น thread๋ ํ์ฌ interrupt ๋๊ธฐ ์ํ๊ฐ ์๋๋๋ค.",
|
| 1867 |
+
)
|
| 1868 |
+
|
| 1869 |
+
session_id = graph_state.values.get("session_id", "")
|
| 1870 |
+
|
| 1871 |
+
# ๊ฐ์ ๊ฑฐ์ + interrupt_reason ์ ๋ฌ๋ก resume
|
| 1872 |
+
result = await manager.graph.ainvoke(
|
| 1873 |
+
Command(resume={"approved": False, "cancel": True}),
|
| 1874 |
+
config,
|
| 1875 |
+
)
|
| 1876 |
+
|
| 1877 |
+
# persist ๋
ธ๋์์ "interrupted" ๊ธฐ๋ก์ ์ํด state update
|
| 1878 |
+
# (approval_wait_node๊ฐ cancel ์ ํธ๋ฅผ interrupt_reason์ผ๋ก ๋ณํ)
|
| 1879 |
+
return {
|
| 1880 |
+
"status": "cancelled",
|
| 1881 |
+
"thread_id": thread_id,
|
| 1882 |
+
"session_id": session_id,
|
| 1883 |
+
"graph_run_id": result.get("request_id", ""),
|
| 1884 |
+
}
|
| 1885 |
+
except HTTPException:
|
| 1886 |
+
raise
|
| 1887 |
+
except Exception as exc:
|
| 1888 |
+
logger.error(f"[v2/agent/cancel] ์์ธ ๋ฐ์: {exc}")
|
| 1889 |
+
return {
|
| 1890 |
+
"status": "error",
|
| 1891 |
+
"thread_id": thread_id,
|
| 1892 |
+
"error": str(exc),
|
| 1893 |
+
}
|
| 1894 |
+
|
| 1895 |
+
|
| 1896 |
+
if __name__ == "__main__":
|
| 1897 |
+
import uvicorn
|
| 1898 |
+
|
| 1899 |
+
uvicorn.run(app, **runtime_config.to_uvicorn_kwargs())
|
src/inference/bm25_indexer.py
ADDED
|
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BM25 Indexer for Korean civil complaint search.
|
| 3 |
+
|
| 4 |
+
Provides sparse keyword-based retrieval using morpheme analysis (Okt/Mecab)
|
| 5 |
+
and BM25Okapi ranking. Complements the dense FAISS retriever for hybrid search.
|
| 6 |
+
|
| 7 |
+
Issue: #153
|
| 8 |
+
|
| 9 |
+
Known limitation:
|
| 10 |
+
BM25Okapi assigns negative IDF when a term appears in every document
|
| 11 |
+
(df == N). search() returns only positive-scoring results, so a single-
|
| 12 |
+
document corpus may return empty results for exact-match queries.
|
| 13 |
+
In practice this does not occur at production scale (10k+ documents).
|
| 14 |
+
|
| 15 |
+
Security:
|
| 16 |
+
Uses pickle for BM25Okapi serialization. Only load index files from
|
| 17 |
+
trusted sources within the closed-network environment. When the
|
| 18 |
+
BM25_INDEX_HMAC_KEY environment variable is set, save() signs the
|
| 19 |
+
payload and load() verifies the HMAC before deserialization.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import hashlib
|
| 23 |
+
import hmac
|
| 24 |
+
import json
|
| 25 |
+
import os
|
| 26 |
+
import pickle
|
| 27 |
+
from typing import List, Optional, Tuple
|
| 28 |
+
|
| 29 |
+
import numpy as np
|
| 30 |
+
from loguru import logger
|
| 31 |
+
from rank_bm25 import BM25Okapi
|
| 32 |
+
|
| 33 |
+
# Minimal Korean stopwords relevant to civil complaints
|
| 34 |
+
# Defined before KoreanTokenizer to avoid forward-reference maintenance hazard.
|
| 35 |
+
_STOPWORDS = frozenset(
|
| 36 |
+
{
|
| 37 |
+
"์ด๋ค",
|
| 38 |
+
"์๋ค",
|
| 39 |
+
"ํ๋ค",
|
| 40 |
+
"๋๋ค",
|
| 41 |
+
"์๋ค",
|
| 42 |
+
"์๋ค",
|
| 43 |
+
"์ด๋ฐ",
|
| 44 |
+
"์ ๋ฐ",
|
| 45 |
+
"๊ทธ๋ฐ",
|
| 46 |
+
"ํฉ๋๋ค",
|
| 47 |
+
"์
๋๋ค",
|
| 48 |
+
"์ต๋๋ค",
|
| 49 |
+
"๋ฉ๋๋ค",
|
| 50 |
+
"์์ต๋๋ค",
|
| 51 |
+
"์์ต๋๋ค",
|
| 52 |
+
"์์",
|
| 53 |
+
"์ผ๋ก",
|
| 54 |
+
"์๊ฒ",
|
| 55 |
+
"๊น์ง",
|
| 56 |
+
"๋ถํฐ",
|
| 57 |
+
"์์๋",
|
| 58 |
+
"์ผ๋ก๋",
|
| 59 |
+
"๊ทธ๋ฆฌ๊ณ ",
|
| 60 |
+
"ํ์ง๋ง",
|
| 61 |
+
"๊ทธ๋ฌ๋",
|
| 62 |
+
"๋ฐ๋ผ์",
|
| 63 |
+
"๊ทธ๋์",
|
| 64 |
+
}
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class KoreanTokenizer:
|
| 69 |
+
"""
|
| 70 |
+
Korean morpheme tokenizer with Mecab (preferred) and Okt (fallback).
|
| 71 |
+
In closed-network environments where Mecab is not installed, Okt is used.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def __init__(self, tokenizer_type: str = "auto"):
|
| 75 |
+
"""
|
| 76 |
+
Args:
|
| 77 |
+
tokenizer_type: "mecab", "okt", or "auto" (tries Mecab first, falls back to Okt)
|
| 78 |
+
"""
|
| 79 |
+
self.tokenizer_type = tokenizer_type
|
| 80 |
+
self._tagger = None
|
| 81 |
+
self._init_tokenizer(tokenizer_type)
|
| 82 |
+
|
| 83 |
+
def _init_tokenizer(self, tokenizer_type: str) -> None:
|
| 84 |
+
if tokenizer_type in ("mecab", "auto"):
|
| 85 |
+
try:
|
| 86 |
+
from konlpy.tag import Mecab
|
| 87 |
+
|
| 88 |
+
self._tagger = Mecab()
|
| 89 |
+
self.tokenizer_type = "mecab"
|
| 90 |
+
logger.info("Tokenizer initialized: Mecab")
|
| 91 |
+
return
|
| 92 |
+
except Exception:
|
| 93 |
+
if tokenizer_type == "mecab":
|
| 94 |
+
raise RuntimeError(
|
| 95 |
+
"Mecab is not installed. Install it or use tokenizer_type='okt'."
|
| 96 |
+
)
|
| 97 |
+
logger.warning("Mecab unavailable, falling back to Okt.")
|
| 98 |
+
|
| 99 |
+
# Okt path
|
| 100 |
+
try:
|
| 101 |
+
from konlpy.tag import Okt
|
| 102 |
+
|
| 103 |
+
self._tagger = Okt()
|
| 104 |
+
self.tokenizer_type = "okt"
|
| 105 |
+
logger.info("Tokenizer initialized: Okt")
|
| 106 |
+
except Exception as e:
|
| 107 |
+
raise RuntimeError(f"Failed to initialize any Korean tokenizer: {e}")
|
| 108 |
+
|
| 109 |
+
def morphs(self, text: str) -> List[str]:
|
| 110 |
+
"""Tokenize text into morphemes, filtering stopwords and short tokens."""
|
| 111 |
+
if not text or not text.strip():
|
| 112 |
+
return []
|
| 113 |
+
try:
|
| 114 |
+
tokens = self._tagger.morphs(str(text))
|
| 115 |
+
# Filter single characters and common stopwords
|
| 116 |
+
return [t for t in tokens if len(t) > 1 and t not in _STOPWORDS]
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.warning(
|
| 119 |
+
f"Tokenization error (len={len(text)}): {type(e).__name__}. "
|
| 120 |
+
"Falling back to whitespace split."
|
| 121 |
+
)
|
| 122 |
+
return [t for t in str(text).split() if len(t) > 1]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
class BM25Indexer:
|
| 126 |
+
"""
|
| 127 |
+
BM25 keyword index for civil complaint documents.
|
| 128 |
+
|
| 129 |
+
Builds a sparse BM25Okapi index over tokenized Korean text,
|
| 130 |
+
enabling keyword-exact matching for terms like law article numbers,
|
| 131 |
+
department names, and specific complaint keywords.
|
| 132 |
+
|
| 133 |
+
Return type note:
|
| 134 |
+
search() returns List[Tuple[int, float]] โ raw corpus indices and BM25
|
| 135 |
+
scores. This is intentionally lower-level than CivilComplaintRetriever
|
| 136 |
+
which returns List[Dict]. The HybridSearchEngine is responsible for
|
| 137 |
+
mapping indices to metadata and fusing scores across both retrievers.
|
| 138 |
+
|
| 139 |
+
Usage:
|
| 140 |
+
indexer = BM25Indexer()
|
| 141 |
+
indexer.build_index(documents)
|
| 142 |
+
results = indexer.search("๋๋ก ํฌ์ฅ ๊ท ์ด ์ ๊ณ ", top_k=10)
|
| 143 |
+
indexer.save("models/bm25_index/complaints.pkl")
|
| 144 |
+
|
| 145 |
+
# Later:
|
| 146 |
+
indexer2 = BM25Indexer()
|
| 147 |
+
indexer2.load("models/bm25_index/complaints.pkl")
|
| 148 |
+
"""
|
| 149 |
+
|
| 150 |
+
_PAYLOAD_VERSION = 1
|
| 151 |
+
_HMAC_KEY_ENV = "BM25_INDEX_HMAC_KEY"
|
| 152 |
+
|
| 153 |
+
def __init__(self, tokenizer_type: str = "auto"):
|
| 154 |
+
self.tokenizer = KoreanTokenizer(tokenizer_type)
|
| 155 |
+
self.bm25: Optional[BM25Okapi] = None
|
| 156 |
+
self._tokenized_corpus: Optional[List[List[str]]] = None
|
| 157 |
+
self._doc_count: int = 0
|
| 158 |
+
|
| 159 |
+
def __repr__(self) -> str:
|
| 160 |
+
return (
|
| 161 |
+
f"BM25Indexer(docs={self._doc_count}, "
|
| 162 |
+
f"tokenizer={self.tokenizer.tokenizer_type}, "
|
| 163 |
+
f"ready={self.is_ready()})"
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
# ------------------------------------------------------------------
|
| 167 |
+
# Index construction
|
| 168 |
+
# ------------------------------------------------------------------
|
| 169 |
+
|
| 170 |
+
def build_index(self, documents: List[str]) -> None:
|
| 171 |
+
"""
|
| 172 |
+
Build BM25 index from a list of document strings.
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
documents: Raw text documents (one per entry).
|
| 176 |
+
|
| 177 |
+
Raises:
|
| 178 |
+
ValueError: If documents list is empty or all documents tokenize
|
| 179 |
+
to empty token lists (would cause ZeroDivisionError
|
| 180 |
+
inside BM25Okapi).
|
| 181 |
+
"""
|
| 182 |
+
if not documents:
|
| 183 |
+
raise ValueError("Document list is empty.")
|
| 184 |
+
|
| 185 |
+
if self.bm25 is not None:
|
| 186 |
+
logger.warning("Rebuilding BM25 index โ existing index will be replaced.")
|
| 187 |
+
|
| 188 |
+
logger.info(f"Tokenizing {len(documents)} documents...")
|
| 189 |
+
tokenized = [self.tokenizer.morphs(doc) for doc in documents]
|
| 190 |
+
|
| 191 |
+
empty_count = sum(1 for t in tokenized if not t)
|
| 192 |
+
if empty_count:
|
| 193 |
+
logger.warning(f"{empty_count} documents produced empty token lists.")
|
| 194 |
+
|
| 195 |
+
# Guard against all-empty corpus which causes ZeroDivisionError in BM25Okapi
|
| 196 |
+
if all(len(t) == 0 for t in tokenized):
|
| 197 |
+
raise ValueError(
|
| 198 |
+
"All documents produced empty token lists. "
|
| 199 |
+
"Check that documents contain valid Korean text."
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
logger.info("Building BM25 index...")
|
| 203 |
+
self._tokenized_corpus = tokenized
|
| 204 |
+
self.bm25 = BM25Okapi(self._tokenized_corpus)
|
| 205 |
+
self._doc_count = len(documents)
|
| 206 |
+
logger.info(f"BM25 index built: {self._doc_count} documents.")
|
| 207 |
+
|
| 208 |
+
def build_index_from_jsonl(self, data_path: str, text_field: str = "text") -> None:
|
| 209 |
+
"""
|
| 210 |
+
Build index by loading documents from a JSONL file.
|
| 211 |
+
|
| 212 |
+
Each line must be a JSON object with a field matching `text_field`.
|
| 213 |
+
For files using EXAONE chat template format, the complaint content
|
| 214 |
+
is extracted from the [|user|] section automatically.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
data_path: Path to JSONL file.
|
| 218 |
+
text_field: JSON field containing the text ("text" or "complaint").
|
| 219 |
+
"""
|
| 220 |
+
if not os.path.exists(data_path):
|
| 221 |
+
raise FileNotFoundError(f"Data file not found: {data_path}")
|
| 222 |
+
|
| 223 |
+
documents = []
|
| 224 |
+
with open(data_path, "r", encoding="utf-8") as f:
|
| 225 |
+
for line_no, line in enumerate(f, 1):
|
| 226 |
+
line = line.strip()
|
| 227 |
+
if not line:
|
| 228 |
+
continue
|
| 229 |
+
try:
|
| 230 |
+
item = json.loads(line)
|
| 231 |
+
if text_field in item:
|
| 232 |
+
raw = item[text_field]
|
| 233 |
+
# Auto-extract complaint from EXAONE chat template
|
| 234 |
+
if isinstance(raw, str) and "[|user|]" in raw:
|
| 235 |
+
text = self._extract_complaint_from_template(raw)
|
| 236 |
+
else:
|
| 237 |
+
text = raw
|
| 238 |
+
elif "complaint" in item:
|
| 239 |
+
text = item["complaint"]
|
| 240 |
+
elif "input" in item:
|
| 241 |
+
text = item["input"]
|
| 242 |
+
else:
|
| 243 |
+
text = self._extract_complaint_from_template(item.get("text", ""))
|
| 244 |
+
# Ensure text is always a string
|
| 245 |
+
if not isinstance(text, str):
|
| 246 |
+
text = str(text) if text is not None else ""
|
| 247 |
+
documents.append(text)
|
| 248 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 249 |
+
logger.warning(f"Line {line_no}: skipping due to error: {e}")
|
| 250 |
+
|
| 251 |
+
logger.info(f"Loaded {len(documents)} documents from {data_path}")
|
| 252 |
+
self.build_index(documents)
|
| 253 |
+
|
| 254 |
+
@staticmethod
|
| 255 |
+
def _extract_complaint_from_template(text: str) -> str:
|
| 256 |
+
"""Extract complaint content from EXAONE chat template format."""
|
| 257 |
+
if not text:
|
| 258 |
+
return text
|
| 259 |
+
try:
|
| 260 |
+
if "[|user|]" in text:
|
| 261 |
+
user_part = text.split("[|user|]")[1].split("[|endofturn|]")[0]
|
| 262 |
+
if "๋ฏผ์ ๋ด์ฉ:" in user_part:
|
| 263 |
+
return user_part.split("๋ฏผ์ ๋ด์ฉ:")[1].strip()
|
| 264 |
+
return user_part.strip()
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.debug(f"Template extraction fallback: {type(e).__name__}")
|
| 267 |
+
return text
|
| 268 |
+
|
| 269 |
+
# ------------------------------------------------------------------
|
| 270 |
+
# Search
|
| 271 |
+
# ------------------------------------------------------------------
|
| 272 |
+
|
| 273 |
+
def search(self, query: str, top_k: int = 10) -> List[Tuple[int, float]]:
|
| 274 |
+
"""
|
| 275 |
+
Search the BM25 index and return top-k (index, score) pairs.
|
| 276 |
+
|
| 277 |
+
Only positive-scoring documents are returned. Scores are raw BM25
|
| 278 |
+
values and are not normalized โ the HybridSearchEngine handles
|
| 279 |
+
score fusion (e.g., RRF) across dense and sparse retrievers.
|
| 280 |
+
|
| 281 |
+
Args:
|
| 282 |
+
query: Korean query string.
|
| 283 |
+
top_k: Number of results to return.
|
| 284 |
+
|
| 285 |
+
Returns:
|
| 286 |
+
List of (document_index, bm25_score) tuples, sorted by score desc.
|
| 287 |
+
|
| 288 |
+
Raises:
|
| 289 |
+
RuntimeError: If index has not been built or loaded.
|
| 290 |
+
"""
|
| 291 |
+
if self.bm25 is None:
|
| 292 |
+
raise RuntimeError("Index not built. Call build_index() first.")
|
| 293 |
+
if not query or not query.strip():
|
| 294 |
+
return []
|
| 295 |
+
|
| 296 |
+
tokenized_query = self.tokenizer.morphs(query)
|
| 297 |
+
if not tokenized_query:
|
| 298 |
+
logger.warning("Query tokenized to empty list. Returning no results.")
|
| 299 |
+
return []
|
| 300 |
+
|
| 301 |
+
scores: np.ndarray = self.bm25.get_scores(tokenized_query)
|
| 302 |
+
|
| 303 |
+
# Use argpartition O(N) instead of argsort O(N log N) for top-k selection
|
| 304 |
+
actual_k = min(top_k, len(scores))
|
| 305 |
+
if actual_k == 0:
|
| 306 |
+
return []
|
| 307 |
+
|
| 308 |
+
top_indices = np.argpartition(scores, -actual_k)[-actual_k:]
|
| 309 |
+
top_indices = top_indices[np.argsort(scores[top_indices])[::-1]]
|
| 310 |
+
|
| 311 |
+
results = [(int(idx), float(scores[idx])) for idx in top_indices if scores[idx] > 0.0]
|
| 312 |
+
return results
|
| 313 |
+
|
| 314 |
+
# ------------------------------------------------------------------
|
| 315 |
+
# Persistence
|
| 316 |
+
# ------------------------------------------------------------------
|
| 317 |
+
|
| 318 |
+
def save(self, path: str) -> None:
|
| 319 |
+
"""
|
| 320 |
+
Serialize and save the BM25 index to disk.
|
| 321 |
+
|
| 322 |
+
Security: Uses pickle for BM25Okapi serialization. When the
|
| 323 |
+
``BM25_INDEX_HMAC_KEY`` environment variable is set, the payload is
|
| 324 |
+
signed with HMAC-SHA256 and a ``.sig`` sidecar file is written. Only
|
| 325 |
+
load index files from trusted sources within the closed-network
|
| 326 |
+
environment.
|
| 327 |
+
|
| 328 |
+
Args:
|
| 329 |
+
path: Destination file path (e.g., "models/bm25_index/complaints.pkl").
|
| 330 |
+
"""
|
| 331 |
+
if self.bm25 is None:
|
| 332 |
+
raise RuntimeError("Index not built. Call build_index() first.")
|
| 333 |
+
|
| 334 |
+
# Fix: use abspath to avoid makedirs("") crash on bare filenames
|
| 335 |
+
parent = os.path.dirname(os.path.abspath(path))
|
| 336 |
+
os.makedirs(parent, exist_ok=True)
|
| 337 |
+
|
| 338 |
+
payload = {
|
| 339 |
+
"version": self._PAYLOAD_VERSION,
|
| 340 |
+
"bm25": self.bm25,
|
| 341 |
+
"tokenized_corpus": self._tokenized_corpus,
|
| 342 |
+
"doc_count": self._doc_count,
|
| 343 |
+
"tokenizer_type": self.tokenizer.tokenizer_type,
|
| 344 |
+
}
|
| 345 |
+
data = pickle.dumps(payload, protocol=pickle.HIGHEST_PROTOCOL)
|
| 346 |
+
|
| 347 |
+
# HMAC signing (when key is configured)
|
| 348 |
+
hmac_key = os.getenv(self._HMAC_KEY_ENV)
|
| 349 |
+
if hmac_key:
|
| 350 |
+
sig = hmac.new(hmac_key.encode(), data, hashlib.sha256).hexdigest()
|
| 351 |
+
sig_path = path + ".sig"
|
| 352 |
+
with open(sig_path, "w", encoding="utf-8") as sf:
|
| 353 |
+
sf.write(sig)
|
| 354 |
+
logger.info(f"HMAC signature written to {sig_path}")
|
| 355 |
+
|
| 356 |
+
with open(path, "wb") as f:
|
| 357 |
+
f.write(data)
|
| 358 |
+
logger.info(f"BM25 index saved to {path} ({self._doc_count} documents).")
|
| 359 |
+
|
| 360 |
+
def load(self, path: str) -> None:
|
| 361 |
+
"""
|
| 362 |
+
Load a previously saved BM25 index from disk.
|
| 363 |
+
|
| 364 |
+
Security: When the ``BM25_INDEX_HMAC_KEY`` environment variable is
|
| 365 |
+
set, the HMAC-SHA256 signature is verified before deserialization.
|
| 366 |
+
Pickle deserialization can execute arbitrary code โ only load files
|
| 367 |
+
from trusted sources within the closed-network environment.
|
| 368 |
+
|
| 369 |
+
Args:
|
| 370 |
+
path: Path to the pickle file saved by `save()`.
|
| 371 |
+
|
| 372 |
+
Raises:
|
| 373 |
+
FileNotFoundError: If the index file does not exist.
|
| 374 |
+
ValueError: If the file is corrupt, has an incompatible schema,
|
| 375 |
+
or fails HMAC verification.
|
| 376 |
+
"""
|
| 377 |
+
if not os.path.exists(path):
|
| 378 |
+
raise FileNotFoundError(f"BM25 index file not found: {path}")
|
| 379 |
+
|
| 380 |
+
with open(path, "rb") as f:
|
| 381 |
+
data = f.read()
|
| 382 |
+
|
| 383 |
+
# HMAC verification (when key is configured)
|
| 384 |
+
hmac_key = os.getenv(self._HMAC_KEY_ENV)
|
| 385 |
+
if hmac_key:
|
| 386 |
+
sig_path = path + ".sig"
|
| 387 |
+
if not os.path.exists(sig_path):
|
| 388 |
+
raise ValueError(
|
| 389 |
+
f"HMAC signature file missing: {sig_path}. "
|
| 390 |
+
"Index file cannot be verified โ rebuild the index."
|
| 391 |
+
)
|
| 392 |
+
with open(sig_path, "r", encoding="utf-8") as sf:
|
| 393 |
+
expected_sig = sf.read().strip()
|
| 394 |
+
actual_sig = hmac.new(hmac_key.encode(), data, hashlib.sha256).hexdigest()
|
| 395 |
+
if not hmac.compare_digest(actual_sig, expected_sig):
|
| 396 |
+
raise ValueError(
|
| 397 |
+
"BM25 index HMAC verification failed โ file may be tampered. "
|
| 398 |
+
"Rebuild the index with a trusted data source."
|
| 399 |
+
)
|
| 400 |
+
logger.info("HMAC signature verified.")
|
| 401 |
+
|
| 402 |
+
try:
|
| 403 |
+
payload = pickle.loads(data)
|
| 404 |
+
except Exception as e:
|
| 405 |
+
raise ValueError(f"Failed to load BM25 index (corrupt or incompatible): {e}") from e
|
| 406 |
+
|
| 407 |
+
# Payload version check
|
| 408 |
+
saved_version = payload.get("version")
|
| 409 |
+
if saved_version != self._PAYLOAD_VERSION:
|
| 410 |
+
raise ValueError(
|
| 411 |
+
f"BM25 index version mismatch: file has v{saved_version}, "
|
| 412 |
+
f"expected v{self._PAYLOAD_VERSION}. Rebuild the index."
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
try:
|
| 416 |
+
self.bm25 = payload["bm25"]
|
| 417 |
+
self._tokenized_corpus = payload["tokenized_corpus"]
|
| 418 |
+
self._doc_count = payload["doc_count"]
|
| 419 |
+
except (KeyError, TypeError) as e:
|
| 420 |
+
raise ValueError(
|
| 421 |
+
f"BM25 index file has incompatible schema (missing key: {e}). " "Rebuild the index."
|
| 422 |
+
) from e
|
| 423 |
+
|
| 424 |
+
saved_tokenizer = payload.get("tokenizer_type", "unknown")
|
| 425 |
+
if saved_tokenizer != self.tokenizer.tokenizer_type:
|
| 426 |
+
logger.warning(
|
| 427 |
+
f"Tokenizer mismatch: index was built with '{saved_tokenizer}' "
|
| 428 |
+
f"but current tokenizer is '{self.tokenizer.tokenizer_type}'. "
|
| 429 |
+
"Search recall may be degraded. Rebuild the index to resolve."
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
logger.info(
|
| 433 |
+
f"BM25 index loaded from {path} ({self._doc_count} documents, "
|
| 434 |
+
f"tokenizer: {saved_tokenizer})."
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
# ------------------------------------------------------------------
|
| 438 |
+
# Utilities
|
| 439 |
+
# ------------------------------------------------------------------
|
| 440 |
+
|
| 441 |
+
@property
|
| 442 |
+
def doc_count(self) -> int:
|
| 443 |
+
return self._doc_count
|
| 444 |
+
|
| 445 |
+
def is_ready(self) -> bool:
|
| 446 |
+
return self.bm25 is not None
|
src/inference/db/__init__.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GovOn RAG ๋ฐ์ดํฐ๋ฒ ์ด์ค ๋ชจ๋.
|
| 3 |
+
|
| 4 |
+
SQLAlchemy 2.0 ๊ธฐ๋ฐ ORM ๋ชจ๋ธ, CRUD ๋ ์ด์ด, ๋ณํ ํฌํผ๋ฅผ ์ ๊ณตํ๋ค.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from src.inference.db.converters import (
|
| 8 |
+
dataclass_to_orm,
|
| 9 |
+
orm_to_dataclass,
|
| 10 |
+
orm_to_pydantic,
|
| 11 |
+
)
|
| 12 |
+
from src.inference.db.crud import ( # DocumentSource; IndexingQueue; IndexVersion
|
| 13 |
+
activate_version,
|
| 14 |
+
create_document_source,
|
| 15 |
+
create_index_version,
|
| 16 |
+
create_indexing_queue_item,
|
| 17 |
+
deactivate_versions,
|
| 18 |
+
delete_document_source,
|
| 19 |
+
get_active_version,
|
| 20 |
+
get_by_source_type_and_id,
|
| 21 |
+
get_document_source,
|
| 22 |
+
get_document_sources,
|
| 23 |
+
get_pending_items,
|
| 24 |
+
get_queue_stats,
|
| 25 |
+
update_document_source,
|
| 26 |
+
update_queue_status,
|
| 27 |
+
)
|
| 28 |
+
from src.inference.db.database import SessionLocal, engine, get_db
|
| 29 |
+
from src.inference.db.models import (
|
| 30 |
+
Base,
|
| 31 |
+
DocumentSource,
|
| 32 |
+
IndexingQueue,
|
| 33 |
+
IndexVersion,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
__all__ = [
|
| 37 |
+
# ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ธํ๋ผ
|
| 38 |
+
"engine",
|
| 39 |
+
"SessionLocal",
|
| 40 |
+
"get_db",
|
| 41 |
+
"Base",
|
| 42 |
+
# ORM ๋ชจ๋ธ
|
| 43 |
+
"DocumentSource",
|
| 44 |
+
"IndexingQueue",
|
| 45 |
+
"IndexVersion",
|
| 46 |
+
# DocumentSource CRUD
|
| 47 |
+
"create_document_source",
|
| 48 |
+
"get_document_source",
|
| 49 |
+
"get_document_sources",
|
| 50 |
+
"update_document_source",
|
| 51 |
+
"delete_document_source",
|
| 52 |
+
"get_by_source_type_and_id",
|
| 53 |
+
# IndexingQueue CRUD
|
| 54 |
+
"create_indexing_queue_item",
|
| 55 |
+
"get_pending_items",
|
| 56 |
+
"update_queue_status",
|
| 57 |
+
"get_queue_stats",
|
| 58 |
+
# IndexVersion CRUD
|
| 59 |
+
"create_index_version",
|
| 60 |
+
"get_active_version",
|
| 61 |
+
"deactivate_versions",
|
| 62 |
+
"activate_version",
|
| 63 |
+
# ๋ณํ ํฌํผ
|
| 64 |
+
"orm_to_dataclass",
|
| 65 |
+
"dataclass_to_orm",
|
| 66 |
+
"orm_to_pydantic",
|
| 67 |
+
]
|
src/inference/db/alembic/env.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Alembic ํ๊ฒฝ ์ค์ .
|
| 3 |
+
|
| 4 |
+
DATABASE_URL ํ๊ฒฝ๋ณ์๋ฅผ ํตํด ์ฐ๊ฒฐ ๋ฌธ์์ด์ ์ฃผ์
๋ฐ๋๋ค.
|
| 5 |
+
๊ธฐ๋ณธ๊ฐ์ ๋ก์ปฌ GovOn ํ ๋๋ ํฐ๋ฆฌ ์๋ SQLite ํ์ผ์ ์ฌ์ฉํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
# isort:skip_file
|
| 9 |
+
import logging
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from logging.config import fileConfig
|
| 14 |
+
|
| 15 |
+
from alembic import context
|
| 16 |
+
from sqlalchemy import engine_from_config, pool
|
| 17 |
+
|
| 18 |
+
# ํ๋ก์ ํธ ๋ฃจํธ๋ฅผ sys.path์ ์ถ๊ฐํ์ฌ ๋ชจ๋ธ import ๊ฐ๋ฅํ๊ฒ ํจ
|
| 19 |
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../..")))
|
| 20 |
+
|
| 21 |
+
from src.inference.db.models import Base # noqa: E402
|
| 22 |
+
|
| 23 |
+
# Alembic Config ๊ฐ์ฒด
|
| 24 |
+
config = context.config
|
| 25 |
+
|
| 26 |
+
# ๋ก๊น
์ค์
|
| 27 |
+
if config.config_file_name is not None:
|
| 28 |
+
fileConfig(config.config_file_name)
|
| 29 |
+
|
| 30 |
+
# ๋ฉํ๋ฐ์ดํฐ ์ค์ (์๋ ๋ง์ด๊ทธ๋ ์ด์
์์ฑ์ฉ)
|
| 31 |
+
target_metadata = Base.metadata
|
| 32 |
+
|
| 33 |
+
# ํ๊ฒฝ๋ณ์์์ DB URL ๊ฐ์ ธ์ค๊ธฐ
|
| 34 |
+
_DEFAULT_GOVON_HOME = Path(os.getenv("GOVON_HOME", Path.home() / ".govon"))
|
| 35 |
+
_DEFAULT_DATABASE_URL = f"sqlite:///{_DEFAULT_GOVON_HOME / 'metadata.sqlite3'}"
|
| 36 |
+
database_url = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URL)
|
| 37 |
+
|
| 38 |
+
if database_url == _DEFAULT_DATABASE_URL:
|
| 39 |
+
logging.getLogger(__name__).warning(
|
| 40 |
+
"DATABASE_URL ํ๊ฒฝ๋ณ์๊ฐ ์ค์ ๋์ง ์์ ๋ก์ปฌ SQLite ๊ธฐ๋ณธ๊ฐ์ ์ฌ์ฉํฉ๋๋ค. "
|
| 41 |
+
"๋ณ๋ RDBMS๋ฅผ ์ฌ์ฉํ๋ ค๋ฉด DATABASE_URL์ ๋ช
์์ ์ผ๋ก ์ค์ ํ์ธ์."
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
config.set_main_option("sqlalchemy.url", database_url)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def run_migrations_offline() -> None:
|
| 48 |
+
"""์คํ๋ผ์ธ ๋ชจ๋: DB ์ฐ๊ฒฐ ์์ด SQL ์คํฌ๋ฆฝํธ๋ง ์์ฑ."""
|
| 49 |
+
url = config.get_main_option("sqlalchemy.url")
|
| 50 |
+
context.configure(
|
| 51 |
+
url=url,
|
| 52 |
+
target_metadata=target_metadata,
|
| 53 |
+
literal_binds=True,
|
| 54 |
+
dialect_opts={"paramstyle": "named"},
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
with context.begin_transaction():
|
| 58 |
+
context.run_migrations()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def run_migrations_online() -> None:
|
| 62 |
+
"""์จ๋ผ์ธ ๋ชจ๋: DB์ ์ง์ ์ฐ๊ฒฐํ์ฌ ๋ง์ด๊ทธ๋ ์ด์
์คํ."""
|
| 63 |
+
connectable = engine_from_config(
|
| 64 |
+
config.get_section(config.config_ini_section, {}),
|
| 65 |
+
prefix="sqlalchemy.",
|
| 66 |
+
poolclass=pool.NullPool,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
with connectable.connect() as connection:
|
| 70 |
+
context.configure(
|
| 71 |
+
connection=connection,
|
| 72 |
+
target_metadata=target_metadata,
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
with context.begin_transaction():
|
| 76 |
+
context.run_migrations()
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
if context.is_offline_mode():
|
| 80 |
+
run_migrations_offline()
|
| 81 |
+
else:
|
| 82 |
+
run_migrations_online()
|
src/inference/db/alembic/script.py.mako
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""${message}
|
| 2 |
+
|
| 3 |
+
Revision ID: ${up_revision}
|
| 4 |
+
Revises: ${down_revision | comma,n}
|
| 5 |
+
Create Date: ${create_date}
|
| 6 |
+
"""
|
| 7 |
+
from typing import Sequence, Union
|
| 8 |
+
|
| 9 |
+
from alembic import op
|
| 10 |
+
import sqlalchemy as sa
|
| 11 |
+
${imports if imports else ""}
|
| 12 |
+
|
| 13 |
+
# revision identifiers, used by Alembic.
|
| 14 |
+
revision: str = ${repr(up_revision)}
|
| 15 |
+
down_revision: Union[str, None] = ${repr(down_revision)}
|
| 16 |
+
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
| 17 |
+
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def upgrade() -> None:
|
| 21 |
+
${upgrades if upgrades else "pass"}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def downgrade() -> None:
|
| 25 |
+
${downgrades if downgrades else "pass"}
|
src/inference/db/alembic/versions/001_create_rag_tables.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""RAG ํต์ฌ ํ
์ด๋ธ ์์ฑ: document_source, indexing_queue, index_version
|
| 2 |
+
|
| 3 |
+
ADR-004 Section D ์คํค๋ง์ Issue #152 ์๊ตฌ์ฌํญ์ ๋ณํฉํ ์ด๊ธฐ ๋ง์ด๊ทธ๋ ์ด์
.
|
| 4 |
+
ORM ๋ชจ๋ธ(models.py)๊ณผ ์ ํฉ์ฑ์ ์ ์งํ๋ค.
|
| 5 |
+
|
| 6 |
+
Revision ID: 001
|
| 7 |
+
Revises: None
|
| 8 |
+
Create Date: 2026-03-22
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from typing import Sequence, Union
|
| 12 |
+
|
| 13 |
+
import sqlalchemy as sa
|
| 14 |
+
from alembic import op
|
| 15 |
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
| 16 |
+
|
| 17 |
+
# revision identifiers, used by Alembic.
|
| 18 |
+
revision: str = "001"
|
| 19 |
+
down_revision: Union[str, None] = None
|
| 20 |
+
branch_labels: Union[str, Sequence[str], None] = None
|
| 21 |
+
depends_on: Union[str, Sequence[str], None] = None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def upgrade() -> None:
|
| 25 |
+
# ------------------------------------------------------------------
|
| 26 |
+
# 1. document_source ํ
์ด๋ธ
|
| 27 |
+
# ------------------------------------------------------------------
|
| 28 |
+
op.create_table(
|
| 29 |
+
"document_source",
|
| 30 |
+
# PK
|
| 31 |
+
sa.Column(
|
| 32 |
+
"id",
|
| 33 |
+
UUID(as_uuid=True),
|
| 34 |
+
server_default=sa.text("gen_random_uuid()"),
|
| 35 |
+
primary_key=True,
|
| 36 |
+
comment="๋ฌธ์ ๊ณ ์ ์๋ณ์",
|
| 37 |
+
),
|
| 38 |
+
# ๊ณตํต ํ๋
|
| 39 |
+
sa.Column(
|
| 40 |
+
"source_type",
|
| 41 |
+
sa.String(20),
|
| 42 |
+
nullable=False,
|
| 43 |
+
comment="๋ฌธ์ ํ์
: case, law, manual, notice",
|
| 44 |
+
),
|
| 45 |
+
sa.Column(
|
| 46 |
+
"source_id",
|
| 47 |
+
sa.String(255),
|
| 48 |
+
nullable=False,
|
| 49 |
+
comment="์๋ณธ ๋ฌธ์ ์๋ณ์",
|
| 50 |
+
),
|
| 51 |
+
sa.Column(
|
| 52 |
+
"source_name",
|
| 53 |
+
sa.String(200),
|
| 54 |
+
nullable=True,
|
| 55 |
+
comment="์ถ์ฒ๋ช
(AI Hub, ๋ฒ์ ์ฒ ๋ฑ)",
|
| 56 |
+
),
|
| 57 |
+
sa.Column("title", sa.String(500), nullable=False, comment="๋ฌธ์ ์ ๋ชฉ"),
|
| 58 |
+
sa.Column("content", sa.Text, nullable=False, comment="๋ฌธ์ ๋ณธ๋ฌธ"),
|
| 59 |
+
sa.Column("category", sa.String(50), nullable=True, comment="์นดํ
๊ณ ๋ฆฌ"),
|
| 60 |
+
sa.Column(
|
| 61 |
+
"chunk_index",
|
| 62 |
+
sa.Integer,
|
| 63 |
+
server_default="0",
|
| 64 |
+
comment="์ฒญํฌ ์ธ๋ฑ์ค",
|
| 65 |
+
),
|
| 66 |
+
sa.Column(
|
| 67 |
+
"total_chunks",
|
| 68 |
+
sa.Integer,
|
| 69 |
+
server_default="1",
|
| 70 |
+
comment="์ ์ฒด ์ฒญํฌ ์",
|
| 71 |
+
),
|
| 72 |
+
sa.Column(
|
| 73 |
+
"reliability_score",
|
| 74 |
+
sa.Float,
|
| 75 |
+
server_default="0.6",
|
| 76 |
+
comment="์ ๋ขฐ๋ ์ ์ (0.0~1.0)",
|
| 77 |
+
),
|
| 78 |
+
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=True, comment="์ ํจ ์์์ผ"),
|
| 79 |
+
sa.Column("valid_until", sa.DateTime(timezone=True), nullable=True, comment="์ ํจ ์ข
๋ฃ์ผ"),
|
| 80 |
+
sa.Column(
|
| 81 |
+
"status",
|
| 82 |
+
sa.String(20),
|
| 83 |
+
server_default=sa.text("'active'"),
|
| 84 |
+
comment="๋ฌธ์ ์ํ: active, expired, deprecated",
|
| 85 |
+
),
|
| 86 |
+
sa.Column(
|
| 87 |
+
"version",
|
| 88 |
+
sa.String(20),
|
| 89 |
+
server_default=sa.text("'1.0'"),
|
| 90 |
+
comment="๋ฌธ์ ๋ฒ์ ",
|
| 91 |
+
),
|
| 92 |
+
# ํ์ฅ ๋ฉํ๋ฐ์ดํฐ (JSONB)
|
| 93 |
+
sa.Column(
|
| 94 |
+
"metadata",
|
| 95 |
+
JSONB,
|
| 96 |
+
server_default=sa.text("'{}'::jsonb"),
|
| 97 |
+
comment="์ถ๊ฐ ๋ฉํ๋ฐ์ดํฐ (JSONB)",
|
| 98 |
+
),
|
| 99 |
+
# CASE ํ์
์ ์ฉ
|
| 100 |
+
sa.Column("complaint_text", sa.Text, nullable=True, comment="๋ฏผ์ ํ
์คํธ (CASE ์ ์ฉ)"),
|
| 101 |
+
sa.Column("answer_text", sa.Text, nullable=True, comment="๋ต๋ณ ํ
์คํธ (CASE ์ ์ฉ)"),
|
| 102 |
+
# LAW ํ์
์ ์ฉ
|
| 103 |
+
sa.Column("law_number", sa.String(100), nullable=True, comment="๋ฒ๋ฅ ๋ฒํธ (LAW ์ ์ฉ)"),
|
| 104 |
+
sa.Column("article_number", sa.String(50), nullable=True, comment="์กฐํญ ๋ฒํธ (LAW ์ ์ฉ)"),
|
| 105 |
+
sa.Column("enforcement_date", sa.Date, nullable=True, comment="์ํ์ผ (LAW ์ ์ฉ)"),
|
| 106 |
+
# MANUAL ํ์
์ ์ฉ
|
| 107 |
+
sa.Column("department", sa.String(100), nullable=True, comment="๋ด๋น ๋ถ์ (MANUAL ์ ์ฉ)"),
|
| 108 |
+
# NOTICE ํ์
์ ์ฉ
|
| 109 |
+
sa.Column(
|
| 110 |
+
"notice_number", sa.String(100), nullable=True, comment="๊ณต์ ๋ฒํธ (NOTICE ์ ์ฉ)"
|
| 111 |
+
),
|
| 112 |
+
sa.Column("effective_date", sa.Date, nullable=True, comment="์ํ์ผ (NOTICE ์ ์ฉ)"),
|
| 113 |
+
# ์ธ๋ฑ์ฑ ๊ด๋ จ
|
| 114 |
+
sa.Column("faiss_index_id", sa.Integer, nullable=True, comment="FAISS ์ธ๋ฑ์ค ๋ด ID"),
|
| 115 |
+
sa.Column(
|
| 116 |
+
"embedding_version",
|
| 117 |
+
sa.String(50),
|
| 118 |
+
server_default=sa.text("'e5-large-v1'"),
|
| 119 |
+
comment="์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ฒ์ ",
|
| 120 |
+
),
|
| 121 |
+
# ํ์์คํฌํ
|
| 122 |
+
sa.Column(
|
| 123 |
+
"created_at",
|
| 124 |
+
sa.DateTime(timezone=True),
|
| 125 |
+
server_default=sa.func.now(),
|
| 126 |
+
comment="์์ฑ ์๊ฐ",
|
| 127 |
+
),
|
| 128 |
+
sa.Column(
|
| 129 |
+
"updated_at",
|
| 130 |
+
sa.DateTime(timezone=True),
|
| 131 |
+
server_default=sa.func.now(),
|
| 132 |
+
comment="์์ ์๊ฐ",
|
| 133 |
+
),
|
| 134 |
+
# UNIQUE ์ ์ฝ์กฐ๊ฑด
|
| 135 |
+
sa.UniqueConstraint(
|
| 136 |
+
"source_type",
|
| 137 |
+
"source_id",
|
| 138 |
+
"chunk_index",
|
| 139 |
+
name="uq_source_type_source_id_chunk",
|
| 140 |
+
),
|
| 141 |
+
# CHECK ์ ์ฝ์กฐ๊ฑด
|
| 142 |
+
sa.CheckConstraint(
|
| 143 |
+
"source_type IN ('case', 'law', 'manual', 'notice')",
|
| 144 |
+
name="ck_source_type_valid",
|
| 145 |
+
),
|
| 146 |
+
sa.CheckConstraint(
|
| 147 |
+
"status IN ('active', 'expired', 'deprecated')",
|
| 148 |
+
name="ck_status_valid",
|
| 149 |
+
),
|
| 150 |
+
sa.CheckConstraint(
|
| 151 |
+
"reliability_score >= 0.0 AND reliability_score <= 1.0",
|
| 152 |
+
name="ck_reliability_score_range",
|
| 153 |
+
),
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
# document_source B-tree ์ธ๋ฑ์ค
|
| 157 |
+
op.create_index("idx_docsource_source_type", "document_source", ["source_type"])
|
| 158 |
+
op.create_index("idx_docsource_status", "document_source", ["status"])
|
| 159 |
+
op.create_index("idx_docsource_category", "document_source", ["category"])
|
| 160 |
+
op.create_index("idx_docsource_valid_range", "document_source", ["valid_from", "valid_until"])
|
| 161 |
+
|
| 162 |
+
# document_source GIN ์ธ๋ฑ์ค - JSONB ๊ฒ์์ฉ
|
| 163 |
+
op.create_index(
|
| 164 |
+
"idx_docsource_metadata",
|
| 165 |
+
"document_source",
|
| 166 |
+
["metadata"],
|
| 167 |
+
postgresql_using="gin",
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# ------------------------------------------------------------------
|
| 171 |
+
# 2. indexing_queue ํ
์ด๋ธ
|
| 172 |
+
# ------------------------------------------------------------------
|
| 173 |
+
op.create_table(
|
| 174 |
+
"indexing_queue",
|
| 175 |
+
# PK
|
| 176 |
+
sa.Column(
|
| 177 |
+
"id",
|
| 178 |
+
UUID(as_uuid=True),
|
| 179 |
+
server_default=sa.text("gen_random_uuid()"),
|
| 180 |
+
primary_key=True,
|
| 181 |
+
comment="ํ ํญ๋ชฉ ๊ณ ์ ์๋ณ์",
|
| 182 |
+
),
|
| 183 |
+
# document_source FK
|
| 184 |
+
sa.Column(
|
| 185 |
+
"document_id",
|
| 186 |
+
UUID(as_uuid=True),
|
| 187 |
+
sa.ForeignKey("document_source.id", ondelete="SET NULL"),
|
| 188 |
+
nullable=True,
|
| 189 |
+
comment="์ฐ๊ฒฐ๋ ๋ฌธ์ ์๋ณธ ID",
|
| 190 |
+
),
|
| 191 |
+
# ์ฐธ์กฐ ํ๋ (FK ์๋ UUID - ์ฐธ์กฐ ํ
์ด๋ธ ๋ฏธ์กด์ฌ)
|
| 192 |
+
sa.Column(
|
| 193 |
+
"session_id",
|
| 194 |
+
UUID(as_uuid=True),
|
| 195 |
+
nullable=True,
|
| 196 |
+
comment="์๋ด ์ธ์
ID (FK ์์)",
|
| 197 |
+
),
|
| 198 |
+
sa.Column(
|
| 199 |
+
"message_id",
|
| 200 |
+
UUID(as_uuid=True),
|
| 201 |
+
nullable=True,
|
| 202 |
+
comment="๋ฉ์์ง ID (FK ์์)",
|
| 203 |
+
),
|
| 204 |
+
# ํ ๋ฐ์ดํฐ
|
| 205 |
+
sa.Column(
|
| 206 |
+
"doc_type",
|
| 207 |
+
sa.String(20),
|
| 208 |
+
server_default=sa.text("'CASE'"),
|
| 209 |
+
comment="๋ฌธ์ ํ์
",
|
| 210 |
+
),
|
| 211 |
+
sa.Column("complaint_text", sa.Text, nullable=False, comment="๋ฏผ์ ํ
์คํธ"),
|
| 212 |
+
sa.Column("answer_text", sa.Text, nullable=False, comment="๋ต๋ณ ํ
์คํธ"),
|
| 213 |
+
sa.Column("category", sa.String(50), nullable=True, comment="์นดํ
๊ณ ๋ฆฌ"),
|
| 214 |
+
sa.Column(
|
| 215 |
+
"status",
|
| 216 |
+
sa.String(20),
|
| 217 |
+
server_default=sa.text("'pending'"),
|
| 218 |
+
comment="์ฒ๋ฆฌ ์ํ: pending, processing, completed, skipped, failed",
|
| 219 |
+
),
|
| 220 |
+
sa.Column(
|
| 221 |
+
"priority",
|
| 222 |
+
sa.Integer,
|
| 223 |
+
server_default="0",
|
| 224 |
+
comment="์ฐ์ ์์ (๋์์๋ก ๋จผ์ )",
|
| 225 |
+
),
|
| 226 |
+
sa.Column("skip_reason", sa.String(200), nullable=True, comment="๊ฑด๋๋ฐ๊ธฐ ์ฌ์ "),
|
| 227 |
+
# ํ์์คํฌํ
|
| 228 |
+
sa.Column(
|
| 229 |
+
"created_at",
|
| 230 |
+
sa.DateTime(timezone=True),
|
| 231 |
+
server_default=sa.func.now(),
|
| 232 |
+
comment="์์ฑ ์๊ฐ",
|
| 233 |
+
),
|
| 234 |
+
sa.Column(
|
| 235 |
+
"processed_at",
|
| 236 |
+
sa.DateTime(timezone=True),
|
| 237 |
+
nullable=True,
|
| 238 |
+
comment="์ฒ๋ฆฌ ์๋ฃ ์๊ฐ",
|
| 239 |
+
),
|
| 240 |
+
# CHECK ์ ์ฝ์กฐ๊ฑด
|
| 241 |
+
sa.CheckConstraint(
|
| 242 |
+
"status IN ('pending', 'processing', 'completed', 'skipped', 'failed')",
|
| 243 |
+
name="ck_queue_status_valid",
|
| 244 |
+
),
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
# indexing_queue ์ธ๋ฑ์ค
|
| 248 |
+
op.create_index("idx_indexqueue_status", "indexing_queue", ["status"])
|
| 249 |
+
op.create_index("idx_indexqueue_priority", "indexing_queue", ["priority", "created_at"])
|
| 250 |
+
op.create_index("idx_indexqueue_document_id", "indexing_queue", ["document_id"])
|
| 251 |
+
|
| 252 |
+
# ------------------------------------------------------------------
|
| 253 |
+
# 3. index_version ํ
์ด๋ธ
|
| 254 |
+
# ------------------------------------------------------------------
|
| 255 |
+
op.create_table(
|
| 256 |
+
"index_version",
|
| 257 |
+
# PK
|
| 258 |
+
sa.Column(
|
| 259 |
+
"id",
|
| 260 |
+
UUID(as_uuid=True),
|
| 261 |
+
server_default=sa.text("gen_random_uuid()"),
|
| 262 |
+
primary_key=True,
|
| 263 |
+
comment="์ธ๋ฑ์ค ๋ฒ์ ๊ณ ์ ์๋ณ์",
|
| 264 |
+
),
|
| 265 |
+
# ์ธ๋ฑ์ค ์ ๋ณด
|
| 266 |
+
sa.Column(
|
| 267 |
+
"index_type",
|
| 268 |
+
sa.String(20),
|
| 269 |
+
nullable=False,
|
| 270 |
+
comment="์ธ๋ฑ์ค ํ์
(case, law, manual, notice)",
|
| 271 |
+
),
|
| 272 |
+
sa.Column("version", sa.String(50), nullable=False, comment="์ธ๋ฑ์ค ๋ฒ์ "),
|
| 273 |
+
sa.Column("total_documents", sa.Integer, nullable=False, comment="ํฌํจ ๋ฌธ์ ์"),
|
| 274 |
+
sa.Column(
|
| 275 |
+
"index_file_path",
|
| 276 |
+
sa.String(500),
|
| 277 |
+
nullable=False,
|
| 278 |
+
comment="FAISS ๏ฟฝ๏ฟฝ๏ฟฝ๋ฑ์ค ํ์ผ ๊ฒฝ๋ก",
|
| 279 |
+
),
|
| 280 |
+
sa.Column(
|
| 281 |
+
"meta_file_path",
|
| 282 |
+
sa.String(500),
|
| 283 |
+
nullable=False,
|
| 284 |
+
comment="๋ฉํ๋ฐ์ดํฐ ํ์ผ ๊ฒฝ๋ก",
|
| 285 |
+
),
|
| 286 |
+
sa.Column(
|
| 287 |
+
"snapshot_path",
|
| 288 |
+
sa.Text,
|
| 289 |
+
nullable=True,
|
| 290 |
+
comment="์ค๋
์ท ๊ฒฝ๋ก",
|
| 291 |
+
),
|
| 292 |
+
# ์ํ
|
| 293 |
+
sa.Column(
|
| 294 |
+
"built_at",
|
| 295 |
+
sa.DateTime(timezone=True),
|
| 296 |
+
server_default=sa.func.now(),
|
| 297 |
+
comment="๋น๋ ์๊ฐ",
|
| 298 |
+
),
|
| 299 |
+
sa.Column(
|
| 300 |
+
"is_active",
|
| 301 |
+
sa.Boolean,
|
| 302 |
+
server_default=sa.text("true"),
|
| 303 |
+
comment="ํ์ฑ ๋ฒ์ ์ฌ๋ถ",
|
| 304 |
+
),
|
| 305 |
+
sa.Column(
|
| 306 |
+
"build_duration_seconds",
|
| 307 |
+
sa.Float,
|
| 308 |
+
nullable=True,
|
| 309 |
+
comment="๋น๋ ์์ ์๊ฐ (์ด)",
|
| 310 |
+
),
|
| 311 |
+
sa.Column("notes", sa.Text, nullable=True, comment="๋น๊ณ "),
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
# index_version ์ธ๋ฑ์ค
|
| 315 |
+
op.create_index("idx_indexversion_active", "index_version", ["index_type", "is_active"])
|
| 316 |
+
|
| 317 |
+
# ------------------------------------------------------------------
|
| 318 |
+
# 4. updated_at ์๋ ๊ฐฑ์ ํธ๋ฆฌ๊ฑฐ (document_source)
|
| 319 |
+
# ------------------------------------------------------------------
|
| 320 |
+
op.execute("""
|
| 321 |
+
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
| 322 |
+
RETURNS TRIGGER AS $$
|
| 323 |
+
BEGIN
|
| 324 |
+
NEW.updated_at = NOW();
|
| 325 |
+
RETURN NEW;
|
| 326 |
+
END;
|
| 327 |
+
$$ LANGUAGE plpgsql;
|
| 328 |
+
""")
|
| 329 |
+
|
| 330 |
+
op.execute("""
|
| 331 |
+
CREATE TRIGGER trg_docsource_updated_at
|
| 332 |
+
BEFORE UPDATE ON document_source
|
| 333 |
+
FOR EACH ROW
|
| 334 |
+
EXECUTE FUNCTION update_updated_at_column();
|
| 335 |
+
""")
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def downgrade() -> None:
|
| 339 |
+
# ํธ๋ฆฌ๊ฑฐ ๋ฐ ํจ์ ์ ๊ฑฐ
|
| 340 |
+
op.execute("DROP TRIGGER IF EXISTS trg_docsource_updated_at ON document_source;")
|
| 341 |
+
op.execute("DROP FUNCTION IF EXISTS update_updated_at_column();")
|
| 342 |
+
|
| 343 |
+
# ํ
์ด๋ธ ์ ๊ฑฐ (์์กด์ฑ ์ญ์)
|
| 344 |
+
op.drop_table("index_version")
|
| 345 |
+
op.drop_table("indexing_queue")
|
| 346 |
+
op.drop_table("document_source")
|
src/inference/db/converters.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ORM <-> Dataclass / Pydantic ๋ณํ ํฌํผ.
|
| 3 |
+
|
| 4 |
+
DocumentSource ORM ์ธ์คํด์ค๋ฅผ ๊ธฐ์กด DocumentMetadata(dataclass) ๋๋
|
| 5 |
+
DocumentMetadataSchema(Pydantic) ๋ชจ๋ธ๋ก ์ํธ ๋ณํํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from typing import Any, Dict
|
| 9 |
+
|
| 10 |
+
from src.inference.db.models import DocumentSource
|
| 11 |
+
from src.inference.index_manager import DocumentMetadata, IndexType
|
| 12 |
+
from src.inference.schemas import DocumentMetadataSchema
|
| 13 |
+
|
| 14 |
+
# ํ์
๋ณ ์ ์ฉ ํ๋ ๋ชฉ๋ก (ORM <-> Dataclass/Pydantic ๋ณํ ์ ๊ณตํต ์ฌ์ฉ)
|
| 15 |
+
_TYPE_SPECIFIC_FIELDS: tuple = (
|
| 16 |
+
"complaint_text",
|
| 17 |
+
"answer_text", # CASE
|
| 18 |
+
"law_number",
|
| 19 |
+
"article_number", # LAW
|
| 20 |
+
"enforcement_date", # LAW
|
| 21 |
+
"department", # MANUAL
|
| 22 |
+
"notice_number",
|
| 23 |
+
"effective_date", # NOTICE
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# ORM -> Dataclass
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def orm_to_dataclass(doc_source: DocumentSource) -> DocumentMetadata:
|
| 33 |
+
"""DocumentSource ORM -> DocumentMetadata dataclass ๋ณํ.
|
| 34 |
+
|
| 35 |
+
ORM์ ํ์
๋ณ ์ ์ฉ ํ๋(complaint_text, law_number ๋ฑ)๋
|
| 36 |
+
extras dict์ ๋ชจ์์ ์ ๋ฌํ๋ค.
|
| 37 |
+
"""
|
| 38 |
+
# ํ์
๋ณ ์ถ๊ฐ ํ๋๋ฅผ extras๋ก ์์ง
|
| 39 |
+
extras: Dict[str, Any] = {}
|
| 40 |
+
if doc_source.metadata_:
|
| 41 |
+
extras.update(doc_source.metadata_)
|
| 42 |
+
|
| 43 |
+
for field_name in _TYPE_SPECIFIC_FIELDS:
|
| 44 |
+
value = getattr(doc_source, field_name, None)
|
| 45 |
+
if value is not None:
|
| 46 |
+
# date/datetime ๊ฐ์ฒด๋ ISO ๋ฌธ์์ด๋ก ์ง๋ ฌํ
|
| 47 |
+
extras[field_name] = value.isoformat() if hasattr(value, "isoformat") else value
|
| 48 |
+
|
| 49 |
+
return DocumentMetadata(
|
| 50 |
+
doc_id=str(doc_source.id),
|
| 51 |
+
doc_type=doc_source.source_type,
|
| 52 |
+
source=doc_source.source_name or "",
|
| 53 |
+
title=doc_source.title,
|
| 54 |
+
category=doc_source.category or "",
|
| 55 |
+
reliability_score=doc_source.reliability_score,
|
| 56 |
+
created_at=doc_source.created_at.isoformat(),
|
| 57 |
+
updated_at=doc_source.updated_at.isoformat(),
|
| 58 |
+
valid_from=(doc_source.valid_from.isoformat() if doc_source.valid_from else None),
|
| 59 |
+
valid_until=(doc_source.valid_until.isoformat() if doc_source.valid_until else None),
|
| 60 |
+
chunk_index=doc_source.chunk_index,
|
| 61 |
+
chunk_total=doc_source.total_chunks,
|
| 62 |
+
extras=extras,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
# Dataclass -> ORM create kwargs
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def dataclass_to_orm(meta: DocumentMetadata, content: str) -> Dict[str, Any]:
|
| 72 |
+
"""DocumentMetadata dataclass -> DocumentSource ์์ฑ์ฉ kwargs ๋์
๋๋ฆฌ.
|
| 73 |
+
|
| 74 |
+
Parameters
|
| 75 |
+
----------
|
| 76 |
+
meta : DocumentMetadata
|
| 77 |
+
๋ด๋ถ dataclass ์ธ์คํด์ค.
|
| 78 |
+
content : str
|
| 79 |
+
๋ฌธ์ ๋ณธ๋ฌธ ํ
์คํธ (dataclass์๋ content๊ฐ ์์).
|
| 80 |
+
|
| 81 |
+
Returns
|
| 82 |
+
-------
|
| 83 |
+
dict
|
| 84 |
+
crud.create_document_source()์ ์ ๋ฌํ kwargs.
|
| 85 |
+
"""
|
| 86 |
+
extras = dict(meta.extras) if meta.extras else {}
|
| 87 |
+
|
| 88 |
+
kwargs: Dict[str, Any] = {
|
| 89 |
+
"source_type": meta.doc_type,
|
| 90 |
+
"source_id": meta.doc_id,
|
| 91 |
+
"source_name": meta.source,
|
| 92 |
+
"title": meta.title,
|
| 93 |
+
"content": content,
|
| 94 |
+
"category": meta.category,
|
| 95 |
+
"chunk_index": meta.chunk_index,
|
| 96 |
+
"total_chunks": meta.chunk_total,
|
| 97 |
+
"reliability_score": meta.reliability_score,
|
| 98 |
+
"metadata_": {},
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
# ISO ๋ฌธ์์ด -> datetime ๋ณํ (valid_from/valid_until)
|
| 102 |
+
from datetime import datetime
|
| 103 |
+
|
| 104 |
+
if meta.valid_from:
|
| 105 |
+
kwargs["valid_from"] = datetime.fromisoformat(meta.valid_from)
|
| 106 |
+
if meta.valid_until:
|
| 107 |
+
kwargs["valid_until"] = datetime.fromisoformat(meta.valid_until)
|
| 108 |
+
|
| 109 |
+
# extras์์ ํ์
๋ณ ์ ์ฉ ํ๋ ์ถ์ถ
|
| 110 |
+
_type_field_map = {
|
| 111 |
+
"complaint_text": str,
|
| 112 |
+
"answer_text": str,
|
| 113 |
+
"law_number": str,
|
| 114 |
+
"article_number": str,
|
| 115 |
+
"enforcement_date": str, # DATE ์ปฌ๋ผ์ด๋ฏ๋ก ๋ฌธ์์ด ๊ทธ๋๋ก ์ ๋ฌ
|
| 116 |
+
"department": str,
|
| 117 |
+
"notice_number": str,
|
| 118 |
+
"effective_date": str,
|
| 119 |
+
}
|
| 120 |
+
remaining_extras: Dict[str, Any] = {}
|
| 121 |
+
for key, value in extras.items():
|
| 122 |
+
if key in _type_field_map:
|
| 123 |
+
kwargs[key] = value
|
| 124 |
+
else:
|
| 125 |
+
remaining_extras[key] = value
|
| 126 |
+
|
| 127 |
+
kwargs["metadata_"] = remaining_extras
|
| 128 |
+
return kwargs
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ---------------------------------------------------------------------------
|
| 132 |
+
# ORM -> Pydantic
|
| 133 |
+
# ---------------------------------------------------------------------------
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def orm_to_pydantic(doc_source: DocumentSource) -> DocumentMetadataSchema:
|
| 137 |
+
"""DocumentSource ORM -> DocumentMetadataSchema Pydantic ๋ชจ๋ธ ๋ณํ."""
|
| 138 |
+
# ํ์
๋ณ ์ถ๊ฐ ํ๋ + JSONB metadata๋ฅผ ํฉ์ฐ
|
| 139 |
+
extra_meta: Dict[str, Any] = {}
|
| 140 |
+
if doc_source.metadata_:
|
| 141 |
+
extra_meta.update(doc_source.metadata_)
|
| 142 |
+
|
| 143 |
+
for field_name in _TYPE_SPECIFIC_FIELDS:
|
| 144 |
+
value = getattr(doc_source, field_name, None)
|
| 145 |
+
if value is not None:
|
| 146 |
+
extra_meta[field_name] = value.isoformat() if hasattr(value, "isoformat") else value
|
| 147 |
+
|
| 148 |
+
return DocumentMetadataSchema(
|
| 149 |
+
doc_id=str(doc_source.id),
|
| 150 |
+
source_type=IndexType(doc_source.source_type),
|
| 151 |
+
source_id=doc_source.source_id,
|
| 152 |
+
title=doc_source.title,
|
| 153 |
+
content=doc_source.content,
|
| 154 |
+
chunk_index=doc_source.chunk_index,
|
| 155 |
+
total_chunks=doc_source.total_chunks,
|
| 156 |
+
created_at=doc_source.created_at,
|
| 157 |
+
updated_at=doc_source.updated_at,
|
| 158 |
+
valid_from=doc_source.valid_from,
|
| 159 |
+
valid_until=doc_source.valid_until,
|
| 160 |
+
reliability_score=doc_source.reliability_score,
|
| 161 |
+
metadata=extra_meta,
|
| 162 |
+
)
|
src/inference/db/crud.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CRUD ๋ ์ด์ด (Unit of Work ํจํด).
|
| 3 |
+
|
| 4 |
+
DocumentSource, IndexingQueue, IndexVersion ํ
์ด๋ธ์ ๋ํ
|
| 5 |
+
์์ฑ/์กฐํ/์์ /์ญ์ ํจ์๋ฅผ ์ ๊ณตํ๋ค.
|
| 6 |
+
๋ชจ๋ ํจ์๋ ๋๊ธฐ Session์ ์ธ์๋ก ๋ฐ๋๋ค.
|
| 7 |
+
|
| 8 |
+
์ด ๋ชจ๋์ ํจ์๋ค์ ๋ด๋ถ์์ commit์ ์ํํ์ง ์๋๋ค.
|
| 9 |
+
ํธ๋์ญ์
์ commit/rollback ์ ์ด๋ caller(์๋น์ค ๊ณ์ธต)์ ์ฑ
์์ด๋ค.
|
| 10 |
+
๋ณตํฉ ์์
์ ์์์ฑ์ ๋ณด์ฅํ๊ธฐ ์ํด flush๋ง ์ํํ์ฌ DB์ SQL์ ์ ์กํ๋,
|
| 11 |
+
์ต์ข
ํ์ ์ caller๊ฐ ๊ฒฐ์ ํ๋ค.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import uuid
|
| 15 |
+
from datetime import datetime, timezone
|
| 16 |
+
from typing import Any, Dict, List, Optional
|
| 17 |
+
|
| 18 |
+
from sqlalchemy import func, select, update
|
| 19 |
+
from sqlalchemy.orm import Session
|
| 20 |
+
|
| 21 |
+
from src.inference.db.models import DocumentSource, IndexingQueue, IndexVersion
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# ์์ ์ ์
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
MAX_LIMIT = 1000
|
| 28 |
+
|
| 29 |
+
_ALLOWED_FILTER_COLUMNS = frozenset(
|
| 30 |
+
{
|
| 31 |
+
"source_type",
|
| 32 |
+
"source_id",
|
| 33 |
+
"status",
|
| 34 |
+
"category",
|
| 35 |
+
"source_name",
|
| 36 |
+
"embedding_version",
|
| 37 |
+
"version",
|
| 38 |
+
}
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
_IMMUTABLE_FIELDS = frozenset({"id", "created_at"})
|
| 42 |
+
|
| 43 |
+
_VALID_QUEUE_STATUSES = frozenset(
|
| 44 |
+
{
|
| 45 |
+
"pending",
|
| 46 |
+
"processing",
|
| 47 |
+
"completed",
|
| 48 |
+
"skipped",
|
| 49 |
+
"failed",
|
| 50 |
+
}
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ============================================================================
|
| 55 |
+
# DocumentSource CRUD
|
| 56 |
+
# ============================================================================
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def create_document_source(db: Session, **kwargs: Any) -> DocumentSource:
|
| 60 |
+
"""์ ๋ฌธ์ ์๋ณธ ๋ ์ฝ๋๋ฅผ ์์ฑํ๋ค."""
|
| 61 |
+
doc = DocumentSource(**kwargs)
|
| 62 |
+
db.add(doc)
|
| 63 |
+
db.flush()
|
| 64 |
+
db.refresh(doc)
|
| 65 |
+
return doc
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def get_document_source(db: Session, doc_id: uuid.UUID) -> Optional[DocumentSource]:
|
| 69 |
+
"""ID๋ก ๋ฌธ์ ์๋ณธ์ ์กฐํํ๋ค."""
|
| 70 |
+
return db.get(DocumentSource, doc_id)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def get_document_sources(
|
| 74 |
+
db: Session,
|
| 75 |
+
filters: Optional[Dict[str, Any]] = None,
|
| 76 |
+
skip: int = 0,
|
| 77 |
+
limit: int = 100,
|
| 78 |
+
) -> List[DocumentSource]:
|
| 79 |
+
"""ํํฐ ์กฐ๊ฑด์ ๋ง๋ ๋ฌธ์ ์๋ณธ ๋ชฉ๋ก์ ์กฐํํ๋ค.
|
| 80 |
+
|
| 81 |
+
Parameters
|
| 82 |
+
----------
|
| 83 |
+
filters : dict, optional
|
| 84 |
+
์ปฌ๋ผ๋ช
-๊ฐ ์์ ํํฐ ๋์
๋๋ฆฌ.
|
| 85 |
+
์: {"source_type": "case", "status": "active"}
|
| 86 |
+
skip : int
|
| 87 |
+
๊ฑด๋๋ธ ํ ์ (ํ์ด์ง๋ค์ด์
์คํ์
).
|
| 88 |
+
limit : int
|
| 89 |
+
์ต๋ ๋ฐํ ํ ์.
|
| 90 |
+
"""
|
| 91 |
+
limit = min(limit, MAX_LIMIT)
|
| 92 |
+
stmt = select(DocumentSource)
|
| 93 |
+
|
| 94 |
+
if filters:
|
| 95 |
+
for col_name, value in filters.items():
|
| 96 |
+
if col_name in _ALLOWED_FILTER_COLUMNS:
|
| 97 |
+
stmt = stmt.where(getattr(DocumentSource, col_name) == value)
|
| 98 |
+
|
| 99 |
+
stmt = stmt.offset(skip).limit(limit).order_by(DocumentSource.created_at.desc())
|
| 100 |
+
return list(db.scalars(stmt).all())
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def update_document_source(
|
| 104 |
+
db: Session, doc_id: uuid.UUID, **kwargs: Any
|
| 105 |
+
) -> Optional[DocumentSource]:
|
| 106 |
+
"""๋ฌธ์ ์๋ณธ ๋ ์ฝ๋๋ฅผ ์์ ํ๋ค.
|
| 107 |
+
|
| 108 |
+
๋ณ๊ฒฝํ ์ปฌ๋ผ-๊ฐ์ kwargs๋ก ์ ๋ฌํ๋ค.
|
| 109 |
+
"""
|
| 110 |
+
doc = db.get(DocumentSource, doc_id)
|
| 111 |
+
if doc is None:
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
+
for key, value in kwargs.items():
|
| 115 |
+
if key in _IMMUTABLE_FIELDS:
|
| 116 |
+
continue
|
| 117 |
+
if hasattr(doc, key):
|
| 118 |
+
setattr(doc, key, value)
|
| 119 |
+
|
| 120 |
+
db.flush()
|
| 121 |
+
db.refresh(doc)
|
| 122 |
+
return doc
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def delete_document_source(db: Session, doc_id: uuid.UUID) -> bool:
|
| 126 |
+
"""๋ฌธ์ ์๋ณธ ๋ ์ฝ๋๋ฅผ ์ญ์ ํ๋ค. ์ฑ๊ณต ์ True ๋ฐํ."""
|
| 127 |
+
doc = db.get(DocumentSource, doc_id)
|
| 128 |
+
if doc is None:
|
| 129 |
+
return False
|
| 130 |
+
|
| 131 |
+
db.delete(doc)
|
| 132 |
+
db.flush()
|
| 133 |
+
return True
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def get_by_source_type_and_id(
|
| 137 |
+
db: Session, source_type: str, source_id: str
|
| 138 |
+
) -> List[DocumentSource]:
|
| 139 |
+
"""source_type + source_id ์กฐํฉ์ผ๋ก ๋ฌธ์๋ฅผ ์กฐํํ๋ค.
|
| 140 |
+
|
| 141 |
+
๋์ผ ๋ฌธ์์ ์ฌ๋ฌ ์ฒญํฌ๊ฐ ๋ฐํ๋ ์ ์์ผ๋ฏ๋ก ๋ฆฌ์คํธ๋ฅผ ๋ฐํํ๋ค.
|
| 142 |
+
"""
|
| 143 |
+
stmt = (
|
| 144 |
+
select(DocumentSource)
|
| 145 |
+
.where(
|
| 146 |
+
DocumentSource.source_type == source_type,
|
| 147 |
+
DocumentSource.source_id == source_id,
|
| 148 |
+
)
|
| 149 |
+
.order_by(DocumentSource.chunk_index)
|
| 150 |
+
)
|
| 151 |
+
return list(db.scalars(stmt).all())
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ============================================================================
|
| 155 |
+
# IndexingQueue CRUD
|
| 156 |
+
# ============================================================================
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def create_indexing_queue_item(db: Session, **kwargs: Any) -> IndexingQueue:
|
| 160 |
+
"""์ธ๋ฑ์ฑ ๋๊ธฐ์ด์ ์ ํญ๋ชฉ์ ์ถ๊ฐํ๋ค."""
|
| 161 |
+
item = IndexingQueue(**kwargs)
|
| 162 |
+
db.add(item)
|
| 163 |
+
db.flush()
|
| 164 |
+
db.refresh(item)
|
| 165 |
+
return item
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def get_pending_items(db: Session, limit: int = 50) -> List[IndexingQueue]:
|
| 169 |
+
"""pending ์ํ์ ๋๊ธฐ์ด ํญ๋ชฉ์ ์ฐ์ ์์ ๋ด๋ฆผ์ฐจ์์ผ๋ก ์กฐํํ๋ค."""
|
| 170 |
+
limit = min(limit, MAX_LIMIT)
|
| 171 |
+
stmt = (
|
| 172 |
+
select(IndexingQueue)
|
| 173 |
+
.where(IndexingQueue.status == "pending")
|
| 174 |
+
.order_by(IndexingQueue.priority.desc(), IndexingQueue.created_at)
|
| 175 |
+
.limit(limit)
|
| 176 |
+
)
|
| 177 |
+
return list(db.scalars(stmt).all())
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def update_queue_status(
|
| 181 |
+
db: Session,
|
| 182 |
+
item_id: uuid.UUID,
|
| 183 |
+
status: str,
|
| 184 |
+
skip_reason: Optional[str] = None,
|
| 185 |
+
) -> Optional[IndexingQueue]:
|
| 186 |
+
"""๋๊ธฐ์ด ํญ๋ชฉ์ ์ํ๋ฅผ ๋ณ๊ฒฝํ๋ค.
|
| 187 |
+
|
| 188 |
+
completed/failed ์ํ๋ก ๋ณ๊ฒฝ ์ processed_at์ ์๋ ์ค์ ํ๋ค.
|
| 189 |
+
"""
|
| 190 |
+
if status not in _VALID_QUEUE_STATUSES:
|
| 191 |
+
raise ValueError(
|
| 192 |
+
f"์ ํจํ์ง ์์ ์ํ: {status!r}. "
|
| 193 |
+
f"ํ์ฉ ๊ฐ: {', '.join(sorted(_VALID_QUEUE_STATUSES))}"
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
item = db.get(IndexingQueue, item_id)
|
| 197 |
+
if item is None:
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
item.status = status
|
| 201 |
+
if skip_reason is not None:
|
| 202 |
+
item.skip_reason = skip_reason
|
| 203 |
+
|
| 204 |
+
if status in ("completed", "failed", "skipped"):
|
| 205 |
+
item.processed_at = datetime.now(timezone.utc)
|
| 206 |
+
|
| 207 |
+
db.flush()
|
| 208 |
+
db.refresh(item)
|
| 209 |
+
return item
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def get_queue_stats(db: Session) -> Dict[str, int]:
|
| 213 |
+
"""๋๊ธฐ์ด ์ํ๋ณ ๊ฑด์๋ฅผ ์ง๊ณํ๋ค.
|
| 214 |
+
|
| 215 |
+
Returns
|
| 216 |
+
-------
|
| 217 |
+
dict
|
| 218 |
+
{"pending": 10, "processing": 2, "completed": 50, ...}
|
| 219 |
+
"""
|
| 220 |
+
stmt = select(IndexingQueue.status, func.count()).group_by(IndexingQueue.status)
|
| 221 |
+
rows = db.execute(stmt).all()
|
| 222 |
+
return {status: count for status, count in rows}
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
# ============================================================================
|
| 226 |
+
# IndexVersion CRUD
|
| 227 |
+
# ============================================================================
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def create_index_version(db: Session, **kwargs: Any) -> IndexVersion:
|
| 231 |
+
"""์ ์ธ๋ฑ์ค ๋ฒ์ ๋ ์ฝ๋๋ฅผ ์์ฑํ๋ค."""
|
| 232 |
+
ver = IndexVersion(**kwargs)
|
| 233 |
+
db.add(ver)
|
| 234 |
+
db.flush()
|
| 235 |
+
db.refresh(ver)
|
| 236 |
+
return ver
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def get_active_version(db: Session, index_type: str) -> Optional[IndexVersion]:
|
| 240 |
+
"""ํน์ index_type์ ํ์ฑ ๋ฒ์ ์ ์กฐํํ๋ค.
|
| 241 |
+
|
| 242 |
+
index_type๋ณ๋ก active ๋ฒ์ ์ ์ต๋ 1๊ฐ์ฌ์ผ ํ๋ค.
|
| 243 |
+
"""
|
| 244 |
+
stmt = (
|
| 245 |
+
select(IndexVersion)
|
| 246 |
+
.where(
|
| 247 |
+
IndexVersion.index_type == index_type,
|
| 248 |
+
IndexVersion.is_active.is_(True),
|
| 249 |
+
)
|
| 250 |
+
.order_by(IndexVersion.built_at.desc())
|
| 251 |
+
.limit(1)
|
| 252 |
+
)
|
| 253 |
+
return db.scalars(stmt).first()
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def deactivate_versions(db: Session, index_type: str) -> int:
|
| 257 |
+
"""ํน์ index_type์ ๋ชจ๋ ํ์ฑ ๋ฒ์ ์ ๋นํ์ฑํํ๋ค.
|
| 258 |
+
|
| 259 |
+
์ ์ธ๋ฑ์ค๋ฅผ ํ์ฑํํ๊ธฐ ์ ์ ํธ์ถํ์ฌ ๋จ์ผ ํ์ฑ ๋ฒ์ ์ ๋ณด์ฅํ๋ค.
|
| 260 |
+
|
| 261 |
+
Returns
|
| 262 |
+
-------
|
| 263 |
+
int
|
| 264 |
+
๋นํ์ฑํ๋ ๋ ์ฝ๋ ์.
|
| 265 |
+
"""
|
| 266 |
+
stmt = (
|
| 267 |
+
update(IndexVersion)
|
| 268 |
+
.where(
|
| 269 |
+
IndexVersion.index_type == index_type,
|
| 270 |
+
IndexVersion.is_active.is_(True),
|
| 271 |
+
)
|
| 272 |
+
.values(is_active=False)
|
| 273 |
+
)
|
| 274 |
+
result = db.execute(stmt)
|
| 275 |
+
db.flush()
|
| 276 |
+
return result.rowcount # type: ignore[return-value]
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def activate_version(db: Session, version_id: uuid.UUID) -> Optional[IndexVersion]:
|
| 280 |
+
"""ํน์ ์ธ๋ฑ์ค ๋ฒ์ ์ ํ์ฑํํ๋ค.
|
| 281 |
+
|
| 282 |
+
๋์ผ index_type์ ๊ธฐ์กด ํ์ฑ ๋ฒ์ ์ ๋จผ์ ๋นํ์ฑํํ ๋ค ๋์์ ํ์ฑํํ๋ค.
|
| 283 |
+
|
| 284 |
+
Race Condition ๋ฐฉ์ง:
|
| 285 |
+
SELECT ... FOR UPDATE๋ก ๋์ผ index_type์ ๋ชจ๋ ๋ฒ์ ์ ํ ๋ ๋ฒจ ์ ๊ธ์
|
| 286 |
+
ํ๋ํ ๋ค deactivate/activate๋ฅผ ์ํํ๋ค. ๋์ ํธ์ถ ์ ํ๋ฐ ํธ๋์ญ์
์
|
| 287 |
+
์ ๊ธ ํด์ ๊น์ง ๋๊ธฐํ๋ฏ๋ก ๋ค์ค active ๋ฒ์ ์ด ์๊ธฐ๋ ๋ฌธ์ ๋ฅผ ๋ฐฉ์งํ๋ค.
|
| 288 |
+
(PostgreSQL ์ ์ฉ โ SQLite๋ FOR UPDATE๋ฅผ ์ง์ํ์ง ์๋๋ค.)
|
| 289 |
+
"""
|
| 290 |
+
ver = db.get(IndexVersion, version_id)
|
| 291 |
+
if ver is None:
|
| 292 |
+
return None
|
| 293 |
+
|
| 294 |
+
# ๋์ผ index_type์ ๋ชจ๋ ๋ฒ์ ์ ๋ํด ํ ๋ ๋ฒจ ์ ๊ธ ํ๋ (PostgreSQL ์ ์ฉ)
|
| 295 |
+
lock_stmt = (
|
| 296 |
+
select(IndexVersion).where(IndexVersion.index_type == ver.index_type).with_for_update()
|
| 297 |
+
)
|
| 298 |
+
db.execute(lock_stmt)
|
| 299 |
+
|
| 300 |
+
# ์ ๊ธ ํ๋ ํ ๋์ผ ํ์
์ ๊ธฐ์กด ํ์ฑ ๋ฒ์ ๋นํ์ฑํ
|
| 301 |
+
deactivate_versions(db, ver.index_type)
|
| 302 |
+
|
| 303 |
+
ver.is_active = True
|
| 304 |
+
db.flush()
|
| 305 |
+
db.refresh(ver)
|
| 306 |
+
return ver
|
src/inference/db/database.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SQLAlchemy 2.0 ๋ฐ์ดํฐ๋ฒ ์ด์ค ์์ง/์ธ์
์ค์ .
|
| 3 |
+
|
| 4 |
+
๋๊ธฐ ์ธ์
๊ธฐ๋ฐ์ผ๋ก ๊ตฌ์ฑํ๋ฉฐ, FastAPI ์์กด์ฑ ์ฃผ์
(get_db)์ ์ ๊ณตํ๋ค.
|
| 5 |
+
๊ธฐ๋ณธ๊ฐ์ ๋ก์ปฌ ๋จ์ผ ์ฌ์ฉ์ MVP์ ๋ง์ถฐ GovOn ํ ๋๋ ํฐ๋ฆฌ ์๋ SQLite ํ์ผ์ ์ฌ์ฉํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Generator
|
| 12 |
+
|
| 13 |
+
from sqlalchemy import create_engine
|
| 14 |
+
from sqlalchemy.orm import Session, sessionmaker
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
+
# ์์ง & ์ธ์
ํฉํ ๋ฆฌ
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
|
| 22 |
+
_DEFAULT_GOVON_HOME = Path(os.getenv("GOVON_HOME", Path.home() / ".govon"))
|
| 23 |
+
_DEFAULT_DATABASE_URL = f"sqlite:///{_DEFAULT_GOVON_HOME / 'metadata.sqlite3'}"
|
| 24 |
+
|
| 25 |
+
DATABASE_URL: str = os.getenv("DATABASE_URL", _DEFAULT_DATABASE_URL)
|
| 26 |
+
|
| 27 |
+
if DATABASE_URL == _DEFAULT_DATABASE_URL:
|
| 28 |
+
logger.warning(
|
| 29 |
+
"DATABASE_URL ํ๊ฒฝ๋ณ์๊ฐ ์ค์ ๋์ง ์์ ๋ก์ปฌ SQLite ๊ธฐ๋ณธ๊ฐ์ ์ฌ์ฉํฉ๋๋ค. "
|
| 30 |
+
"๋ณ๋ RDBMS๋ฅผ ์ฌ์ฉํ๋ ค๋ฉด DATABASE_URL์ ๋ช
์์ ์ผ๋ก ์ค์ ํ์ธ์."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
engine_kwargs = {
|
| 34 |
+
"echo": os.getenv("SQL_ECHO", "").lower() in ("1", "true"),
|
| 35 |
+
}
|
| 36 |
+
if DATABASE_URL.startswith("sqlite:///"):
|
| 37 |
+
_DEFAULT_GOVON_HOME.mkdir(parents=True, exist_ok=True)
|
| 38 |
+
engine_kwargs["connect_args"] = {"check_same_thread": False}
|
| 39 |
+
else:
|
| 40 |
+
engine_kwargs.update(
|
| 41 |
+
{
|
| 42 |
+
"pool_size": 10,
|
| 43 |
+
"max_overflow": 20,
|
| 44 |
+
"pool_pre_ping": True,
|
| 45 |
+
"pool_recycle": 3600,
|
| 46 |
+
}
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
engine = create_engine(DATABASE_URL, **engine_kwargs)
|
| 50 |
+
|
| 51 |
+
SessionLocal = sessionmaker(
|
| 52 |
+
bind=engine,
|
| 53 |
+
autocommit=False,
|
| 54 |
+
autoflush=False,
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ---------------------------------------------------------------------------
|
| 59 |
+
# FastAPI ์์กด์ฑ ์ฃผ์
|
| 60 |
+
# ---------------------------------------------------------------------------
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def get_db() -> Generator[Session, None, None]:
|
| 64 |
+
"""FastAPI Depends()์ฉ ์ธ์
์ ๋๋ ์ดํฐ.
|
| 65 |
+
|
| 66 |
+
์ฌ์ฉ ์์::
|
| 67 |
+
|
| 68 |
+
@router.get("/docs")
|
| 69 |
+
def list_docs(db: Session = Depends(get_db)):
|
| 70 |
+
...
|
| 71 |
+
"""
|
| 72 |
+
db = SessionLocal()
|
| 73 |
+
try:
|
| 74 |
+
yield db
|
| 75 |
+
finally:
|
| 76 |
+
db.rollback()
|
| 77 |
+
db.close()
|
src/inference/db/models.py
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SQLAlchemy 2.0 ORM ๋ชจ๋ธ.
|
| 3 |
+
|
| 4 |
+
ADR-004 + Issue #152 ๋ณํฉ ์คํค๋ง ๊ธฐ๋ฐ 3๊ฐ ํ
์ด๋ธ:
|
| 5 |
+
- DocumentSource : ๋ฌธ์ ์๋ณธ ๋ฉํ๋ฐ์ดํฐ
|
| 6 |
+
- IndexingQueue : ์ธ๋ฑ์ฑ ๋๊ธฐ์ด
|
| 7 |
+
- IndexVersion : FAISS ์ธ๋ฑ์ค ๋ฒ์ ๊ด๋ฆฌ
|
| 8 |
+
|
| 9 |
+
๋ชจ๋ ๋ชจ๋ธ์ SQLAlchemy 2.0 Mapped ์คํ์ผ(mapped_column, Mapped)์ ์ฌ์ฉํ๋ฉฐ,
|
| 10 |
+
server_default๋ก DB ๋ ๋ฒจ ๊ธฐ๋ณธ๊ฐ์ ์ง์ ํ๋ค.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import uuid
|
| 14 |
+
from datetime import date, datetime
|
| 15 |
+
from typing import Any, Dict, List, Optional
|
| 16 |
+
|
| 17 |
+
from sqlalchemy import (
|
| 18 |
+
Boolean,
|
| 19 |
+
CheckConstraint,
|
| 20 |
+
Date,
|
| 21 |
+
DateTime,
|
| 22 |
+
Float,
|
| 23 |
+
ForeignKey,
|
| 24 |
+
Index,
|
| 25 |
+
Integer,
|
| 26 |
+
String,
|
| 27 |
+
Text,
|
| 28 |
+
UniqueConstraint,
|
| 29 |
+
func,
|
| 30 |
+
text,
|
| 31 |
+
)
|
| 32 |
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
| 33 |
+
from sqlalchemy.orm import (
|
| 34 |
+
DeclarativeBase,
|
| 35 |
+
Mapped,
|
| 36 |
+
mapped_column,
|
| 37 |
+
relationship,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
# Base ํด๋์ค
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class Base(DeclarativeBase):
|
| 46 |
+
"""๋ชจ๋ ORM ๋ชจ๋ธ์ ๊ณตํต ๊ธฐ๋ฐ ํด๋์ค."""
|
| 47 |
+
|
| 48 |
+
pass
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# ---------------------------------------------------------------------------
|
| 52 |
+
# DocumentSource
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class DocumentSource(Base):
|
| 57 |
+
"""document_source ํ
์ด๋ธ ORM ๋ชจ๋ธ.
|
| 58 |
+
|
| 59 |
+
๋ชจ๋ ๋ฌธ์ ํ์
(case, law, manual, notice)์ ๋ฉํ๋ฐ์ดํฐ๋ฅผ ํตํฉ ๊ด๋ฆฌํ๋ค.
|
| 60 |
+
ํ์
๋ณ ์ ์ฉ ์ปฌ๋ผ์ nullable๋ก ์ฒ๋ฆฌํ๋ฉฐ, ํด๋นํ์ง ์๋ ํ์
์์๋ NULL์ด๋ค.
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
__tablename__ = "document_source"
|
| 64 |
+
__table_args__ = (
|
| 65 |
+
UniqueConstraint(
|
| 66 |
+
"source_type",
|
| 67 |
+
"source_id",
|
| 68 |
+
"chunk_index",
|
| 69 |
+
name="uq_source_type_source_id_chunk",
|
| 70 |
+
),
|
| 71 |
+
CheckConstraint(
|
| 72 |
+
"source_type IN ('case', 'law', 'manual', 'notice')",
|
| 73 |
+
name="ck_source_type_valid",
|
| 74 |
+
),
|
| 75 |
+
CheckConstraint(
|
| 76 |
+
"status IN ('active', 'expired', 'deprecated')",
|
| 77 |
+
name="ck_status_valid",
|
| 78 |
+
),
|
| 79 |
+
CheckConstraint(
|
| 80 |
+
"reliability_score >= 0.0 AND reliability_score <= 1.0",
|
| 81 |
+
name="ck_reliability_score_range",
|
| 82 |
+
),
|
| 83 |
+
# ์ฑ๋ฅ ์ธ๋ฑ์ค
|
| 84 |
+
Index("idx_docsource_source_type", "source_type"),
|
| 85 |
+
Index("idx_docsource_status", "status"),
|
| 86 |
+
Index("idx_docsource_category", "category"),
|
| 87 |
+
Index("idx_docsource_valid_range", "valid_from", "valid_until"),
|
| 88 |
+
Index("idx_docsource_metadata", "metadata", postgresql_using="gin"),
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# -- ๊ธฐ๋ณธ ํค --
|
| 92 |
+
id: Mapped[uuid.UUID] = mapped_column(
|
| 93 |
+
UUID(as_uuid=True),
|
| 94 |
+
primary_key=True,
|
| 95 |
+
server_default=text("gen_random_uuid()"),
|
| 96 |
+
comment="๋ฌธ์ ๊ณ ์ ์๋ณ์",
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# -- ๊ณตํต ํ๋ --
|
| 100 |
+
source_type: Mapped[str] = mapped_column(
|
| 101 |
+
String(20),
|
| 102 |
+
nullable=False,
|
| 103 |
+
comment="๋ฌธ์ ํ์
: case, law, manual, notice",
|
| 104 |
+
)
|
| 105 |
+
source_id: Mapped[str] = mapped_column(
|
| 106 |
+
String(255),
|
| 107 |
+
nullable=False,
|
| 108 |
+
comment="์๋ณธ ๋ฌธ์ ์๋ณ์",
|
| 109 |
+
)
|
| 110 |
+
source_name: Mapped[Optional[str]] = mapped_column(
|
| 111 |
+
String(200),
|
| 112 |
+
nullable=True,
|
| 113 |
+
comment="์ถ์ฒ๋ช
(AI Hub, ๋ฒ์ ์ฒ ๋ฑ)",
|
| 114 |
+
)
|
| 115 |
+
title: Mapped[str] = mapped_column(
|
| 116 |
+
String(500),
|
| 117 |
+
nullable=False,
|
| 118 |
+
comment="๋ฌธ์ ์ ๋ชฉ",
|
| 119 |
+
)
|
| 120 |
+
content: Mapped[str] = mapped_column(
|
| 121 |
+
Text,
|
| 122 |
+
nullable=False,
|
| 123 |
+
comment="๋ฌธ์ ๋ณธ๋ฌธ",
|
| 124 |
+
)
|
| 125 |
+
category: Mapped[Optional[str]] = mapped_column(
|
| 126 |
+
String(50),
|
| 127 |
+
nullable=True,
|
| 128 |
+
comment="์นดํ
๊ณ ๋ฆฌ (๋๋ก/๊ตํต, ํ๊ฒฝ/์์ ๋ฑ)",
|
| 129 |
+
)
|
| 130 |
+
chunk_index: Mapped[int] = mapped_column(
|
| 131 |
+
Integer,
|
| 132 |
+
server_default=text("0"),
|
| 133 |
+
comment="์ฒญํฌ ์ธ๋ฑ์ค",
|
| 134 |
+
)
|
| 135 |
+
total_chunks: Mapped[int] = mapped_column(
|
| 136 |
+
Integer,
|
| 137 |
+
server_default=text("1"),
|
| 138 |
+
comment="์ ์ฒด ์ฒญํฌ ์",
|
| 139 |
+
)
|
| 140 |
+
reliability_score: Mapped[float] = mapped_column(
|
| 141 |
+
Float,
|
| 142 |
+
server_default=text("0.6"),
|
| 143 |
+
comment="์ ๋ขฐ๋ ์ ์ (0.0~1.0)",
|
| 144 |
+
)
|
| 145 |
+
valid_from: Mapped[Optional[datetime]] = mapped_column(
|
| 146 |
+
DateTime(timezone=True),
|
| 147 |
+
nullable=True,
|
| 148 |
+
comment="์ ํจ ์์์ผ",
|
| 149 |
+
)
|
| 150 |
+
valid_until: Mapped[Optional[datetime]] = mapped_column(
|
| 151 |
+
DateTime(timezone=True),
|
| 152 |
+
nullable=True,
|
| 153 |
+
comment="์ ํจ ์ข
๋ฃ์ผ",
|
| 154 |
+
)
|
| 155 |
+
status: Mapped[str] = mapped_column(
|
| 156 |
+
String(20),
|
| 157 |
+
server_default=text("'active'"),
|
| 158 |
+
comment="๋ฌธ์ ์ํ: active, expired, deprecated",
|
| 159 |
+
)
|
| 160 |
+
version: Mapped[str] = mapped_column(
|
| 161 |
+
String(20),
|
| 162 |
+
server_default=text("'1.0'"),
|
| 163 |
+
comment="๋ฌธ์ ๋ฒ์ ",
|
| 164 |
+
)
|
| 165 |
+
# 'metadata'๋ SQLAlchemy ๋ด๋ถ ์์ฝ์ด์ด๋ฏ๋ก Python ์์ฑ์ metadata_๋ก ๋งคํ
|
| 166 |
+
metadata_: Mapped[Dict[str, Any]] = mapped_column(
|
| 167 |
+
"metadata",
|
| 168 |
+
JSONB,
|
| 169 |
+
server_default=text("'{}'::jsonb"),
|
| 170 |
+
comment="์ถ๊ฐ ๋ฉํ๋ฐ์ดํฐ (JSONB)",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# -- CASE ์ ์ฉ --
|
| 174 |
+
complaint_text: Mapped[Optional[str]] = mapped_column(
|
| 175 |
+
Text,
|
| 176 |
+
nullable=True,
|
| 177 |
+
comment="๋ฏผ์ ํ
์คํธ (CASE ์ ์ฉ)",
|
| 178 |
+
)
|
| 179 |
+
answer_text: Mapped[Optional[str]] = mapped_column(
|
| 180 |
+
Text,
|
| 181 |
+
nullable=True,
|
| 182 |
+
comment="๋ต๋ณ ํ
์คํธ (CASE ์ ์ฉ)",
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# -- LAW ์ ์ฉ --
|
| 186 |
+
law_number: Mapped[Optional[str]] = mapped_column(
|
| 187 |
+
String(100),
|
| 188 |
+
nullable=True,
|
| 189 |
+
comment="๋ฒ๋ฅ ๋ฒํธ (LAW ์ ์ฉ)",
|
| 190 |
+
)
|
| 191 |
+
article_number: Mapped[Optional[str]] = mapped_column(
|
| 192 |
+
String(50),
|
| 193 |
+
nullable=True,
|
| 194 |
+
comment="์กฐํญ ๋ฒํธ (LAW ์ ์ฉ)",
|
| 195 |
+
)
|
| 196 |
+
enforcement_date: Mapped[Optional[date]] = mapped_column(
|
| 197 |
+
Date,
|
| 198 |
+
nullable=True,
|
| 199 |
+
comment="์ํ์ผ (LAW ์ ์ฉ)",
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# -- MANUAL ์ ์ฉ --
|
| 203 |
+
department: Mapped[Optional[str]] = mapped_column(
|
| 204 |
+
String(100),
|
| 205 |
+
nullable=True,
|
| 206 |
+
comment="๋ด๋น ๋ถ์ (MANUAL ์ ์ฉ)",
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
# -- NOTICE ์ ์ฉ --
|
| 210 |
+
notice_number: Mapped[Optional[str]] = mapped_column(
|
| 211 |
+
String(100),
|
| 212 |
+
nullable=True,
|
| 213 |
+
comment="๊ณต์ ๋ฒํธ (NOTICE ์ ์ฉ)",
|
| 214 |
+
)
|
| 215 |
+
effective_date: Mapped[Optional[date]] = mapped_column(
|
| 216 |
+
Date,
|
| 217 |
+
nullable=True,
|
| 218 |
+
comment="์ํ์ผ (NOTICE ์ ์ฉ)",
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
# -- ์ธ๋ฑ์ฑ ๊ด๋ จ --
|
| 222 |
+
faiss_index_id: Mapped[Optional[int]] = mapped_column(
|
| 223 |
+
Integer,
|
| 224 |
+
nullable=True,
|
| 225 |
+
comment="FAISS ์ธ๋ฑ์ค ๋ด ID",
|
| 226 |
+
)
|
| 227 |
+
embedding_version: Mapped[str] = mapped_column(
|
| 228 |
+
String(50),
|
| 229 |
+
server_default=text("'e5-large-v1'"),
|
| 230 |
+
comment="์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ฒ์ ",
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
# -- ํ์์คํฌํ --
|
| 234 |
+
created_at: Mapped[datetime] = mapped_column(
|
| 235 |
+
DateTime(timezone=True),
|
| 236 |
+
server_default=func.now(),
|
| 237 |
+
comment="์์ฑ ์๊ฐ",
|
| 238 |
+
)
|
| 239 |
+
updated_at: Mapped[datetime] = mapped_column(
|
| 240 |
+
DateTime(timezone=True),
|
| 241 |
+
server_default=func.now(),
|
| 242 |
+
onupdate=func.now(),
|
| 243 |
+
comment="์์ ์๊ฐ",
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# -- ๊ด๊ณ --
|
| 247 |
+
queue_items: Mapped[List["IndexingQueue"]] = relationship(
|
| 248 |
+
back_populates="document",
|
| 249 |
+
cascade="all, delete-orphan",
|
| 250 |
+
lazy="select",
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
def __repr__(self) -> str:
|
| 254 |
+
return f"<DocumentSource(id={self.id}, " f"type={self.source_type}, title={self.title!r})>"
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
# ---------------------------------------------------------------------------
|
| 258 |
+
# IndexingQueue
|
| 259 |
+
# ---------------------------------------------------------------------------
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
class IndexingQueue(Base):
|
| 263 |
+
"""indexing_queue ํ
์ด๋ธ ORM ๋ชจ๋ธ.
|
| 264 |
+
|
| 265 |
+
์๋ก์ด ๋ฏผ์ ์๋ด์ด ๋ค์ด์ค๋ฉด ์ธ๋ฑ์ฑ ๋๊ธฐ์ด์ ์ถ๊ฐ๋๋ฉฐ,
|
| 266 |
+
๋ฐฐ์น ํ๋ก์ธ์ค๊ฐ ์ฃผ๊ธฐ์ ์ผ๋ก pending ํญ๋ชฉ์ ์๋นํ๋ค.
|
| 267 |
+
"""
|
| 268 |
+
|
| 269 |
+
__tablename__ = "indexing_queue"
|
| 270 |
+
__table_args__ = (
|
| 271 |
+
CheckConstraint(
|
| 272 |
+
"status IN ('pending', 'processing', 'completed', 'skipped', 'failed')",
|
| 273 |
+
name="ck_queue_status_valid",
|
| 274 |
+
),
|
| 275 |
+
# ์ฑ๋ฅ ์ธ๋ฑ์ค
|
| 276 |
+
Index("idx_indexqueue_status", "status"),
|
| 277 |
+
Index("idx_indexqueue_priority", "priority", "created_at"),
|
| 278 |
+
Index("idx_indexqueue_document_id", "document_id"),
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
id: Mapped[uuid.UUID] = mapped_column(
|
| 282 |
+
UUID(as_uuid=True),
|
| 283 |
+
primary_key=True,
|
| 284 |
+
server_default=text("gen_random_uuid()"),
|
| 285 |
+
comment="ํ ํญ๋ชฉ ๊ณ ์ ์๋ณ์",
|
| 286 |
+
)
|
| 287 |
+
document_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
| 288 |
+
UUID(as_uuid=True),
|
| 289 |
+
ForeignKey("document_source.id", ondelete="SET NULL"),
|
| 290 |
+
nullable=True,
|
| 291 |
+
comment="์ฐ๊ฒฐ๋ ๋ฌธ์ ์๋ณธ ID",
|
| 292 |
+
)
|
| 293 |
+
session_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
| 294 |
+
UUID(as_uuid=True),
|
| 295 |
+
nullable=True,
|
| 296 |
+
comment="์๋ด ์ธ์
ID (FK ์์)",
|
| 297 |
+
)
|
| 298 |
+
message_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
| 299 |
+
UUID(as_uuid=True),
|
| 300 |
+
nullable=True,
|
| 301 |
+
comment="๋ฉ์์ง ID (FK ์์)",
|
| 302 |
+
)
|
| 303 |
+
doc_type: Mapped[str] = mapped_column(
|
| 304 |
+
String(20),
|
| 305 |
+
server_default=text("'CASE'"),
|
| 306 |
+
comment="๋ฌธ์ ํ์
",
|
| 307 |
+
)
|
| 308 |
+
complaint_text: Mapped[str] = mapped_column(
|
| 309 |
+
Text,
|
| 310 |
+
nullable=False,
|
| 311 |
+
comment="๋ฏผ์ ํ
์คํธ",
|
| 312 |
+
)
|
| 313 |
+
answer_text: Mapped[str] = mapped_column(
|
| 314 |
+
Text,
|
| 315 |
+
nullable=False,
|
| 316 |
+
comment="๋ต๋ณ ํ
์คํธ",
|
| 317 |
+
)
|
| 318 |
+
category: Mapped[Optional[str]] = mapped_column(
|
| 319 |
+
String(50),
|
| 320 |
+
nullable=True,
|
| 321 |
+
comment="์นดํ
๊ณ ๋ฆฌ",
|
| 322 |
+
)
|
| 323 |
+
status: Mapped[str] = mapped_column(
|
| 324 |
+
String(20),
|
| 325 |
+
server_default=text("'pending'"),
|
| 326 |
+
comment="์ฒ๋ฆฌ ์ํ: pending, processing, completed, skipped, failed",
|
| 327 |
+
)
|
| 328 |
+
priority: Mapped[int] = mapped_column(
|
| 329 |
+
Integer,
|
| 330 |
+
server_default=text("0"),
|
| 331 |
+
comment="์ฐ์ ์์ (๋์์๋ก ๋จผ์ )",
|
| 332 |
+
)
|
| 333 |
+
skip_reason: Mapped[Optional[str]] = mapped_column(
|
| 334 |
+
String(200),
|
| 335 |
+
nullable=True,
|
| 336 |
+
comment="๊ฑด๋๋ฐ๊ธฐ ์ฌ์ ",
|
| 337 |
+
)
|
| 338 |
+
created_at: Mapped[datetime] = mapped_column(
|
| 339 |
+
DateTime(timezone=True),
|
| 340 |
+
server_default=func.now(),
|
| 341 |
+
comment="์์ฑ ์๊ฐ",
|
| 342 |
+
)
|
| 343 |
+
processed_at: Mapped[Optional[datetime]] = mapped_column(
|
| 344 |
+
DateTime(timezone=True),
|
| 345 |
+
nullable=True,
|
| 346 |
+
comment="์ฒ๋ฆฌ ์๋ฃ ์๊ฐ",
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
# -- ๊ด๊ณ --
|
| 350 |
+
document: Mapped[Optional["DocumentSource"]] = relationship(
|
| 351 |
+
back_populates="queue_items",
|
| 352 |
+
lazy="select",
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
def __repr__(self) -> str:
|
| 356 |
+
return f"<IndexingQueue(id={self.id}, " f"status={self.status}, doc_type={self.doc_type})>"
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
# ---------------------------------------------------------------------------
|
| 360 |
+
# IndexVersion
|
| 361 |
+
# ---------------------------------------------------------------------------
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
class IndexVersion(Base):
|
| 365 |
+
"""index_version ํ
์ด๋ธ ORM ๋ชจ๋ธ.
|
| 366 |
+
|
| 367 |
+
FAISS ์ธ๋ฑ์ค ๋น๋ ์ด๋ ฅ์ ๊ด๋ฆฌํ๋ฉฐ,
|
| 368 |
+
index_type๋ณ๋ก ํ๋์ active ๋ฒ์ ๋ง ์ ์งํ๋ค.
|
| 369 |
+
"""
|
| 370 |
+
|
| 371 |
+
__tablename__ = "index_version"
|
| 372 |
+
__table_args__ = (
|
| 373 |
+
CheckConstraint(
|
| 374 |
+
"index_type IN ('case', 'law', 'manual', 'notice')",
|
| 375 |
+
name="ck_index_type_valid",
|
| 376 |
+
),
|
| 377 |
+
Index("idx_indexversion_active", "index_type", "is_active"),
|
| 378 |
+
Index(
|
| 379 |
+
"uq_indexversion_one_active_per_type",
|
| 380 |
+
"index_type",
|
| 381 |
+
unique=True,
|
| 382 |
+
postgresql_where=text("is_active = true"),
|
| 383 |
+
),
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
id: Mapped[uuid.UUID] = mapped_column(
|
| 387 |
+
UUID(as_uuid=True),
|
| 388 |
+
primary_key=True,
|
| 389 |
+
server_default=text("gen_random_uuid()"),
|
| 390 |
+
comment="์ธ๋ฑ์ค ๋ฒ์ ๊ณ ์ ์๋ณ์",
|
| 391 |
+
)
|
| 392 |
+
index_type: Mapped[str] = mapped_column(
|
| 393 |
+
String(20),
|
| 394 |
+
nullable=False,
|
| 395 |
+
comment="์ธ๋ฑ์ค ํ์
(case, law, manual, notice)",
|
| 396 |
+
)
|
| 397 |
+
version: Mapped[str] = mapped_column(
|
| 398 |
+
String(50),
|
| 399 |
+
nullable=False,
|
| 400 |
+
comment="์ธ๋ฑ์ค ๋ฒ์ (์: v1.0.0)",
|
| 401 |
+
)
|
| 402 |
+
total_documents: Mapped[int] = mapped_column(
|
| 403 |
+
Integer,
|
| 404 |
+
nullable=False,
|
| 405 |
+
comment="ํฌํจ ๋ฌธ์ ์",
|
| 406 |
+
)
|
| 407 |
+
index_file_path: Mapped[str] = mapped_column(
|
| 408 |
+
String(500),
|
| 409 |
+
nullable=False,
|
| 410 |
+
comment="FAISS ์ธ๋ฑ์ค ํ์ผ ๊ฒฝ๋ก",
|
| 411 |
+
)
|
| 412 |
+
meta_file_path: Mapped[str] = mapped_column(
|
| 413 |
+
String(500),
|
| 414 |
+
nullable=False,
|
| 415 |
+
comment="๋ฉํ๋ฐ์ดํฐ ํ์ผ ๊ฒฝ๋ก",
|
| 416 |
+
)
|
| 417 |
+
snapshot_path: Mapped[Optional[str]] = mapped_column(
|
| 418 |
+
Text,
|
| 419 |
+
nullable=True,
|
| 420 |
+
comment="์ค๋
์ท ๊ฒฝ๋ก",
|
| 421 |
+
)
|
| 422 |
+
built_at: Mapped[datetime] = mapped_column(
|
| 423 |
+
DateTime(timezone=True),
|
| 424 |
+
server_default=func.now(),
|
| 425 |
+
comment="๋น๋ ์๊ฐ",
|
| 426 |
+
)
|
| 427 |
+
is_active: Mapped[bool] = mapped_column(
|
| 428 |
+
Boolean,
|
| 429 |
+
server_default=text("true"),
|
| 430 |
+
comment="ํ์ฑ ๋ฒ์ ์ฌ๋ถ",
|
| 431 |
+
)
|
| 432 |
+
build_duration_seconds: Mapped[Optional[float]] = mapped_column(
|
| 433 |
+
Float,
|
| 434 |
+
nullable=True,
|
| 435 |
+
comment="๋น๋ ์์ ์๊ฐ (์ด)",
|
| 436 |
+
)
|
| 437 |
+
notes: Mapped[Optional[str]] = mapped_column(
|
| 438 |
+
Text,
|
| 439 |
+
nullable=True,
|
| 440 |
+
comment="๋น๊ณ ",
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
def __repr__(self) -> str:
|
| 444 |
+
return (
|
| 445 |
+
f"<IndexVersion(id={self.id}, "
|
| 446 |
+
f"type={self.index_type}, version={self.version}, "
|
| 447 |
+
f"active={self.is_active})>"
|
| 448 |
+
)
|
src/inference/document_processor.py
ADDED
|
@@ -0,0 +1,545 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
DocumentProcessor: ๋คํ์ ๋ฌธ์ ํ์ฑ ๋ฐ ํ์ด๋ธ๋ฆฌ๋ ์ฒญํน ๋ชจ๋.
|
| 3 |
+
|
| 4 |
+
์ด์ #156 โ PDF(PyMuPDF), HWP, TXT ํ์๋ฅผ ํตํฉํ๊ณ ,
|
| 5 |
+
์๋ฏธ ๋จ์(์กฐ/ํญ/ํธ, ๋ฌธ๋จ) + ๊ณ ์ ํฌ๊ธฐ(512ํ ํฐ, 128ํ ํฐ ์ค๋ฒ๋ฉ) ํ์ด๋ธ๋ฆฌ๋ ์ฒญํน์ ์ํํ๋ค.
|
| 6 |
+
|
| 7 |
+
ADR-004 Section B.3 ์ฐธ์กฐ.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import hashlib
|
| 11 |
+
import re
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from datetime import datetime, timezone
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 16 |
+
|
| 17 |
+
from loguru import logger
|
| 18 |
+
|
| 19 |
+
from src.inference.index_manager import DocumentMetadata, IndexType
|
| 20 |
+
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
# ํ ํฌ๋์ด์ (ํ ํฐ ๊ธฐ๋ฐ ์ฒญํน์ฉ)
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
|
| 25 |
+
_LOAD_FAILED = object() # ์ผํฐ๋: ๋ก๋ ์คํจ ํ์
|
| 26 |
+
_tokenizer = None # None=๋ฏธ์๋, _LOAD_FAILED=์คํจํ์
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _get_tokenizer():
|
| 30 |
+
"""transformers ํ ํฌ๋์ด์ ๋ฅผ lazy-loadํ๋ค.
|
| 31 |
+
|
| 32 |
+
EXAONE ํ ํฌ๋์ด์ ๊ฐ ์์ผ๋ฉด ๋จ์ ๋ฌธ์ ๊ธฐ๋ฐ ๊ทผ์ฌ๋ก ํด๋ฐฑ.
|
| 33 |
+
๋ก๋ ์คํจ ์ ์ผํฐ๋์ ์ค์ ํ์ฌ ์ฌ์๋๋ฅผ ๋ฐฉ์งํ๋ค.
|
| 34 |
+
"""
|
| 35 |
+
global _tokenizer
|
| 36 |
+
if _tokenizer is _LOAD_FAILED:
|
| 37 |
+
return None
|
| 38 |
+
if _tokenizer is not None:
|
| 39 |
+
return _tokenizer
|
| 40 |
+
try:
|
| 41 |
+
from transformers import AutoTokenizer
|
| 42 |
+
|
| 43 |
+
_tokenizer = AutoTokenizer.from_pretrained(
|
| 44 |
+
"LGAI-EXAONE/EXAONE-Deep-7.8B",
|
| 45 |
+
trust_remote_code=True,
|
| 46 |
+
)
|
| 47 |
+
logger.info("EXAONE ํ ํฌ๋์ด์ ๋ก๋ ์๋ฃ")
|
| 48 |
+
except Exception:
|
| 49 |
+
logger.warning("EXAONE ํ ํฌ๋์ด์ ๋ก๋ ์คํจ โ ๋ฌธ์ ๊ธฐ๋ฐ ํด๋ฐฑ ์ฌ์ฉ")
|
| 50 |
+
_tokenizer = _LOAD_FAILED
|
| 51 |
+
return None if _tokenizer is _LOAD_FAILED else _tokenizer
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _count_tokens(text: str) -> int:
|
| 55 |
+
"""ํ
์คํธ์ ํ ํฐ ์๋ฅผ ๋ฐํํ๋ค."""
|
| 56 |
+
tok = _get_tokenizer()
|
| 57 |
+
if tok is not None:
|
| 58 |
+
return len(tok.encode(text, add_special_tokens=False))
|
| 59 |
+
# ํด๋ฐฑ: ํ๊ตญ์ด ํ๊ท 1.5์ โ 1ํ ํฐ ๊ทผ์ฌ
|
| 60 |
+
return max(1, len(text) // 2)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ---------------------------------------------------------------------------
|
| 64 |
+
# ํ์ (PDF / HWP / TXT)
|
| 65 |
+
# ---------------------------------------------------------------------------
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _parse_pdf_pages(file_path: str) -> List[Tuple[int, str]]:
|
| 69 |
+
"""PyMuPDF๋ก PDF์ ํ์ด์ง๋ณ ํ
์คํธ๋ฅผ ์ถ์ถํ๋ค."""
|
| 70 |
+
try:
|
| 71 |
+
import fitz # PyMuPDF
|
| 72 |
+
except ImportError as e:
|
| 73 |
+
raise ImportError("PyMuPDF๊ฐ ์ค์น๋์ง ์์์ต๋๋ค: pip install PyMuPDF") from e
|
| 74 |
+
|
| 75 |
+
pages: List[Tuple[int, str]] = []
|
| 76 |
+
with fitz.open(file_path) as doc:
|
| 77 |
+
for page_number, page in enumerate(doc, start=1):
|
| 78 |
+
text = page.get_text("text")
|
| 79 |
+
if text.strip():
|
| 80 |
+
pages.append((page_number, text))
|
| 81 |
+
return pages
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _parse_pdf(file_path: str) -> str:
|
| 85 |
+
"""PyMuPDF๋ก PDF ํ
์คํธ๋ฅผ ์ถ์ถํ๋ค."""
|
| 86 |
+
pages = _parse_pdf_pages(file_path)
|
| 87 |
+
return "\n\n".join(text for _, text in pages)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _parse_hwp(file_path: str) -> str:
|
| 91 |
+
"""HWP ํ
์คํธ๋ฅผ ์ถ์ถํ๋ค.
|
| 92 |
+
|
| 93 |
+
pyhwp ๋๋ ํธํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๊ฐ ํ์ํ๋ค. PyPI์ ์์ ์ ์ธ HWP ํ์๊ฐ
|
| 94 |
+
์์ผ๋ฏ๋ก ๋ฐํ์ ImportError๋ก ์๋ดํ๋ค.
|
| 95 |
+
"""
|
| 96 |
+
try:
|
| 97 |
+
import hwp
|
| 98 |
+
except ImportError as e:
|
| 99 |
+
raise ImportError(
|
| 100 |
+
"HWP ํ์๊ฐ ์ค์น๋์ง ์์์ต๋๋ค. " "pyhwp ๋๋ ํธํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ค์นํด ์ฃผ์ธ์."
|
| 101 |
+
) from e
|
| 102 |
+
|
| 103 |
+
doc = hwp.open(file_path)
|
| 104 |
+
try:
|
| 105 |
+
paragraphs: List[str] = []
|
| 106 |
+
for paragraph in doc.paragraphs:
|
| 107 |
+
text = paragraph.text.strip()
|
| 108 |
+
if text:
|
| 109 |
+
paragraphs.append(text)
|
| 110 |
+
return "\n\n".join(paragraphs)
|
| 111 |
+
finally:
|
| 112 |
+
if hasattr(doc, "close"):
|
| 113 |
+
doc.close()
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _parse_txt(file_path: str) -> str:
|
| 117 |
+
"""TXT ํ์ผ์ UTF-8๋ก ์ฝ๋๋ค. ์คํจ ์ cp949 ํด๋ฐฑ."""
|
| 118 |
+
path = Path(file_path)
|
| 119 |
+
for encoding in ("utf-8", "cp949", "euc-kr"):
|
| 120 |
+
try:
|
| 121 |
+
return path.read_text(encoding=encoding)
|
| 122 |
+
except (UnicodeDecodeError, LookupError):
|
| 123 |
+
continue
|
| 124 |
+
raise ValueError(f"ํ
์คํธ ํ์ผ ์ธ์ฝ๋ฉ์ ์๋ณํ ์ ์์ต๋๋ค: {file_path}")
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
_PARSERS = {
|
| 128 |
+
".pdf": _parse_pdf,
|
| 129 |
+
".hwp": _parse_hwp,
|
| 130 |
+
".txt": _parse_txt,
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
_PAGE_PARSERS = {
|
| 134 |
+
".pdf": _parse_pdf_pages,
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ---------------------------------------------------------------------------
|
| 139 |
+
# ํ
์คํธ ์ ์
|
| 140 |
+
# ---------------------------------------------------------------------------
|
| 141 |
+
|
| 142 |
+
# ํ์ด์ง ๋ฒํธ, ๋จธ๋ฆฌ๊ธ/๋ฐ๋ฅ๊ธ ํจํด
|
| 143 |
+
_HEADER_FOOTER_RE = re.compile(
|
| 144 |
+
r"^[\s]*[-โโ]?\s*\d+\s*[-โโ]?\s*$", # ํ์ด์ง ๋ฒํธ๋ง ์๋ ์ค
|
| 145 |
+
re.MULTILINE,
|
| 146 |
+
)
|
| 147 |
+
_MULTI_NEWLINE_RE = re.compile(r"\n{3,}")
|
| 148 |
+
_MULTI_SPACE_RE = re.compile(r"[ \t]{2,}")
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _clean_text(text: str) -> str:
|
| 152 |
+
"""์ถ์ถ๋ ์์ ํ
์คํธ๋ฅผ ์ ์ ํ๋ค."""
|
| 153 |
+
text = _HEADER_FOOTER_RE.sub("", text)
|
| 154 |
+
text = _MULTI_NEWLINE_RE.sub("\n\n", text)
|
| 155 |
+
text = _MULTI_SPACE_RE.sub(" ", text)
|
| 156 |
+
return text.strip()
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# ---------------------------------------------------------------------------
|
| 160 |
+
# ์๋ฏธ ๋จ์ ๋ถํ
|
| 161 |
+
# ---------------------------------------------------------------------------
|
| 162 |
+
|
| 163 |
+
# ๋ฒ๋ น: ์ N์กฐ, ์ Nํญ, ์ Nํธ
|
| 164 |
+
_LAW_ARTICLE_RE = re.compile(r"(?=\n\s*์ \s*\d+\s*์กฐ(?:์\d+)?\s*[\(๏ผ])")
|
| 165 |
+
# ๋ฌธ๋จ ๋ถํ (๋น ์ค ๊ธฐ์ค)
|
| 166 |
+
_PARAGRAPH_RE = re.compile(r"\n\s*\n")
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _split_semantic(text: str, doc_type: IndexType) -> List[str]:
|
| 170 |
+
"""๋ฌธ์ ํ์
์ ๋ฐ๋ผ ์๋ฏธ ๋จ์๋ก ๋ถํ ํ๋ค.
|
| 171 |
+
|
| 172 |
+
- LAW: ์กฐ/ํญ ๋จ์
|
| 173 |
+
- MANUAL/NOTICE: ๋ฌธ๋จ(๋น ์ค) ๋จ์
|
| 174 |
+
- CASE: ๋ฌธ๋จ ๋จ์
|
| 175 |
+
"""
|
| 176 |
+
if doc_type == IndexType.LAW:
|
| 177 |
+
segments = _LAW_ARTICLE_RE.split(text)
|
| 178 |
+
else:
|
| 179 |
+
segments = _PARAGRAPH_RE.split(text)
|
| 180 |
+
|
| 181 |
+
return [s.strip() for s in segments if s.strip()]
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# ---------------------------------------------------------------------------
|
| 185 |
+
# ๊ณ ์ ํฌ๊ธฐ ์ฒญํน (ํ ํฐ ๊ธฐ๋ฐ)
|
| 186 |
+
# ---------------------------------------------------------------------------
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _chunk_fixed(
|
| 190 |
+
text: str,
|
| 191 |
+
chunk_size: int = 512,
|
| 192 |
+
chunk_overlap: int = 128,
|
| 193 |
+
) -> List[str]:
|
| 194 |
+
"""ํ ํฐ ๊ธฐ๋ฐ ๊ณ ์ ํฌ๊ธฐ ์ฒญํน.
|
| 195 |
+
|
| 196 |
+
ํ ํฌ๋์ด์ ๊ฐ ๋ก๋๋ ๊ฒฝ์ฐ ์ ํํ ํ ํฐ ๋ถํ ,
|
| 197 |
+
๊ทธ๋ ์ง ์์ผ๋ฉด ๋ฌธ์ ๊ธฐ๋ฐ ๊ทผ์ฌ ๋ถํ ์ ์ํํ๋ค.
|
| 198 |
+
"""
|
| 199 |
+
# overlap์ด chunk_size ์ด์์ด๋ฉด ๋ณด์ (๋ฌดํ๋ฃจํ ๋ฐฉ์ง)
|
| 200 |
+
if chunk_overlap >= chunk_size:
|
| 201 |
+
chunk_overlap = chunk_size // 4
|
| 202 |
+
|
| 203 |
+
tok = _get_tokenizer()
|
| 204 |
+
|
| 205 |
+
if tok is not None:
|
| 206 |
+
token_ids = tok.encode(text, add_special_tokens=False)
|
| 207 |
+
if len(token_ids) <= chunk_size:
|
| 208 |
+
return [text]
|
| 209 |
+
|
| 210 |
+
chunks: List[str] = []
|
| 211 |
+
start = 0
|
| 212 |
+
step = max(1, chunk_size - chunk_overlap)
|
| 213 |
+
while start < len(token_ids):
|
| 214 |
+
end = min(start + chunk_size, len(token_ids))
|
| 215 |
+
chunk_text = tok.decode(token_ids[start:end], skip_special_tokens=True)
|
| 216 |
+
if chunk_text.strip():
|
| 217 |
+
chunks.append(chunk_text.strip())
|
| 218 |
+
if end >= len(token_ids):
|
| 219 |
+
break
|
| 220 |
+
start += step
|
| 221 |
+
return chunks
|
| 222 |
+
|
| 223 |
+
# ํด๋ฐฑ: ๋ฌธ์ ๊ธฐ๋ฐ ๊ทผ์ฌ (ํ๊ตญ์ด ~2์ โ 1ํ ํฐ)
|
| 224 |
+
char_size = chunk_size * 2
|
| 225 |
+
char_overlap = chunk_overlap * 2
|
| 226 |
+
if len(text) <= char_size:
|
| 227 |
+
return [text]
|
| 228 |
+
|
| 229 |
+
chunks = []
|
| 230 |
+
start = 0
|
| 231 |
+
step = max(1, char_size - char_overlap)
|
| 232 |
+
while start < len(text):
|
| 233 |
+
end = min(start + char_size, len(text))
|
| 234 |
+
chunk_text = text[start:end].strip()
|
| 235 |
+
if chunk_text:
|
| 236 |
+
chunks.append(chunk_text)
|
| 237 |
+
if end >= len(text):
|
| 238 |
+
break
|
| 239 |
+
start += step
|
| 240 |
+
return chunks
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ---------------------------------------------------------------------------
|
| 244 |
+
# ํ์ด๋ธ๋ฆฌ๋ ์ฒญํน
|
| 245 |
+
# ---------------------------------------------------------------------------
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def _hybrid_chunk(
|
| 249 |
+
text: str,
|
| 250 |
+
doc_type: IndexType,
|
| 251 |
+
chunk_size: int = 512,
|
| 252 |
+
chunk_overlap: int = 128,
|
| 253 |
+
min_chunk_tokens: int = 50,
|
| 254 |
+
) -> List[str]:
|
| 255 |
+
"""์๋ฏธ ๋จ์ + ๊ณ ์ ํฌ๊ธฐ ํ์ด๋ธ๋ฆฌ๋ ์ฒญํน.
|
| 256 |
+
|
| 257 |
+
1๋จ๊ณ: ์๋ฏธ ๋จ์ ๋ถํ (์กฐ/ํญ, ๋ฌธ๋จ)
|
| 258 |
+
2๋จ๊ณ: ํฐ ์ธ๊ทธ๋จผํธ๋ ๊ณ ์ ํฌ๊ธฐ๋ก ์ฌ๋ถํ
|
| 259 |
+
3๋จ๊ณ: ์์ ์ธ๊ทธ๋จผํธ๋ ์ธ์ ์ธ๊ทธ๋จผํธ์ ๋ณํฉ
|
| 260 |
+
"""
|
| 261 |
+
if not text.strip():
|
| 262 |
+
return []
|
| 263 |
+
|
| 264 |
+
segments = _split_semantic(text, doc_type)
|
| 265 |
+
|
| 266 |
+
if not segments:
|
| 267 |
+
return _chunk_fixed(text, chunk_size, chunk_overlap)
|
| 268 |
+
|
| 269 |
+
chunks: List[str] = []
|
| 270 |
+
buffer = ""
|
| 271 |
+
|
| 272 |
+
for segment in segments:
|
| 273 |
+
seg_tokens = _count_tokens(segment)
|
| 274 |
+
|
| 275 |
+
if seg_tokens > chunk_size:
|
| 276 |
+
# ๋ฒํผ์ ์์ธ ๊ฒ ๋จผ์ ์ฒ๋ฆฌ
|
| 277 |
+
if buffer.strip():
|
| 278 |
+
if _count_tokens(buffer) > chunk_size:
|
| 279 |
+
chunks.extend(_chunk_fixed(buffer, chunk_size, chunk_overlap))
|
| 280 |
+
else:
|
| 281 |
+
chunks.append(buffer.strip())
|
| 282 |
+
buffer = ""
|
| 283 |
+
# ํฐ ์ธ๊ทธ๋จผํธ๋ ๊ณ ์ ํฌ๊ธฐ๋ก ๋ถํ
|
| 284 |
+
chunks.extend(_chunk_fixed(segment, chunk_size, chunk_overlap))
|
| 285 |
+
elif _count_tokens(buffer + "\n\n" + segment if buffer else segment) > chunk_size:
|
| 286 |
+
# ๋ฒํผ + ํ์ฌ ์ธ๊ทธ๋จผํธ๊ฐ chunk_size๋ฅผ ์ด๊ณผํ๋ฉด ๋ฒํผ flush
|
| 287 |
+
if buffer.strip():
|
| 288 |
+
chunks.append(buffer.strip())
|
| 289 |
+
buffer = segment
|
| 290 |
+
else:
|
| 291 |
+
# ๋ฒํผ์ ์ถ๊ฐ
|
| 292 |
+
buffer = buffer + "\n\n" + segment if buffer else segment
|
| 293 |
+
|
| 294 |
+
# ๋จ์ ๋ฒํผ ์ฒ๋ฆฌ
|
| 295 |
+
if buffer.strip():
|
| 296 |
+
if _count_tokens(buffer) > chunk_size:
|
| 297 |
+
chunks.extend(_chunk_fixed(buffer, chunk_size, chunk_overlap))
|
| 298 |
+
else:
|
| 299 |
+
chunks.append(buffer.strip())
|
| 300 |
+
|
| 301 |
+
# ์ต์ ํ ํฐ ๋ฏธ๋ง ์ฒญํฌ ๋ณํฉ
|
| 302 |
+
merged: List[str] = []
|
| 303 |
+
for chunk in chunks:
|
| 304 |
+
if merged and _count_tokens(chunk) < min_chunk_tokens:
|
| 305 |
+
candidate = merged[-1] + "\n\n" + chunk
|
| 306 |
+
if _count_tokens(candidate) <= chunk_size:
|
| 307 |
+
merged[-1] = candidate
|
| 308 |
+
continue
|
| 309 |
+
merged.append(chunk)
|
| 310 |
+
|
| 311 |
+
return merged if merged else []
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
# ---------------------------------------------------------------------------
|
| 315 |
+
# BatchResult
|
| 316 |
+
# ---------------------------------------------------------------------------
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
@dataclass
|
| 320 |
+
class BatchResult:
|
| 321 |
+
"""process_batch ๋ฐํ ํ์
. ์ฑ๊ณต/์คํจ ์ ๋ณด๋ฅผ ๋ชจ๋ ํฌํจํ๋ค."""
|
| 322 |
+
|
| 323 |
+
succeeded: List[DocumentMetadata] = field(default_factory=list)
|
| 324 |
+
failed: List[Tuple[str, str]] = field(default_factory=list) # [(file_path, error)]
|
| 325 |
+
|
| 326 |
+
@property
|
| 327 |
+
def total_chunks(self) -> int:
|
| 328 |
+
return len(self.succeeded)
|
| 329 |
+
|
| 330 |
+
@property
|
| 331 |
+
def success_count(self) -> int:
|
| 332 |
+
return self.total_chunks - len(self.failed) if not self.failed else self._count_files()
|
| 333 |
+
|
| 334 |
+
def _count_files(self) -> int:
|
| 335 |
+
seen = set()
|
| 336 |
+
for m in self.succeeded:
|
| 337 |
+
seen.add(m.extras.get("file_path", ""))
|
| 338 |
+
return len(seen)
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
# ---------------------------------------------------------------------------
|
| 342 |
+
# DocumentProcessor
|
| 343 |
+
# ---------------------------------------------------------------------------
|
| 344 |
+
|
| 345 |
+
# ๋ฌธ์ ํ์
๋ณ ๊ธฐ๋ณธ ์ ๋ขฐ๋ (ADR-004 Table)
|
| 346 |
+
_DEFAULT_RELIABILITY: Dict[IndexType, float] = {
|
| 347 |
+
IndexType.CASE: 0.6,
|
| 348 |
+
IndexType.LAW: 1.0,
|
| 349 |
+
IndexType.MANUAL: 0.9,
|
| 350 |
+
IndexType.NOTICE: 0.7,
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
class DocumentProcessor:
|
| 355 |
+
"""๋คํ์ ๋ฌธ์๋ฅผ ํ์ฑํ๊ณ ์ฒญํฌ ๋ถํ ํ์ฌ DocumentMetadata ๋ฆฌ์คํธ๋ฅผ ๋ฐํํ๋ค.
|
| 356 |
+
|
| 357 |
+
Parameters
|
| 358 |
+
----------
|
| 359 |
+
chunk_size : int
|
| 360 |
+
์ฒญํฌ๋น ์ต๋ ํ ํฐ ์ (๊ธฐ๋ณธ 512).
|
| 361 |
+
chunk_overlap : int
|
| 362 |
+
์ฒญํฌ ๊ฐ ์ค๋ฒ๋ฉ ํ ํฐ ์ (๊ธฐ๋ณธ 128, ADR-004).
|
| 363 |
+
min_chunk_tokens : int
|
| 364 |
+
์ต์ ์ฒญํฌ ํฌ๊ธฐ. ์ด๋ณด๋ค ์์ผ๋ฉด ์ธ์ ์ฒญํฌ์ ๋ณํฉ (๊ธฐ๋ณธ 50).
|
| 365 |
+
"""
|
| 366 |
+
|
| 367 |
+
SUPPORTED_EXTENSIONS = frozenset(_PARSERS.keys())
|
| 368 |
+
|
| 369 |
+
def __init__(
|
| 370 |
+
self,
|
| 371 |
+
chunk_size: int = 512,
|
| 372 |
+
chunk_overlap: int = 128,
|
| 373 |
+
min_chunk_tokens: int = 50,
|
| 374 |
+
) -> None:
|
| 375 |
+
self.chunk_size = chunk_size
|
| 376 |
+
self.chunk_overlap = chunk_overlap
|
| 377 |
+
self.min_chunk_tokens = min_chunk_tokens
|
| 378 |
+
|
| 379 |
+
# ------------------------------------------------------------------
|
| 380 |
+
# Public API
|
| 381 |
+
# ------------------------------------------------------------------
|
| 382 |
+
|
| 383 |
+
def process(
|
| 384 |
+
self,
|
| 385 |
+
file_path: str,
|
| 386 |
+
doc_type: IndexType,
|
| 387 |
+
*,
|
| 388 |
+
source: str = "",
|
| 389 |
+
title: Optional[str] = None,
|
| 390 |
+
category: str = "",
|
| 391 |
+
reliability_score: Optional[float] = None,
|
| 392 |
+
valid_from: Optional[str] = None,
|
| 393 |
+
valid_until: Optional[str] = None,
|
| 394 |
+
extras: Optional[Dict[str, Any]] = None,
|
| 395 |
+
document_id: Optional[str] = None,
|
| 396 |
+
) -> List[DocumentMetadata]:
|
| 397 |
+
"""ํ์ผ์ ํ์ฑ โ ์ ์ โ ์ฒญํนํ์ฌ DocumentMetadata ๋ฆฌ์คํธ๋ฅผ ๋ฐํํ๋ค.
|
| 398 |
+
|
| 399 |
+
Parameters
|
| 400 |
+
----------
|
| 401 |
+
file_path : str
|
| 402 |
+
ํ์ฑํ ์๋ณธ ๋ฌธ์ ๊ฒฝ๋ก.
|
| 403 |
+
doc_type : IndexType
|
| 404 |
+
๋ฌธ์์ semantic type.
|
| 405 |
+
document_id : Optional[str]
|
| 406 |
+
์๋ณธ ๋ฌธ์ ๋จ์์ ์์ ID. ์ง์ ๋๋ฉด ์์ฑ๋๋ ๋ชจ๋ chunk๊ฐ ๊ฐ์ doc_id๋ฅผ ๊ณต์ ํ๋ค.
|
| 407 |
+
|
| 408 |
+
Returns
|
| 409 |
+
-------
|
| 410 |
+
List[DocumentMetadata]
|
| 411 |
+
์ฒญํฌ๋ณ ๋ฉํ๋ฐ์ดํฐ ๋ฆฌ์คํธ. doc_id๋ ์๋ณธ ๋ฌธ์ ๋จ์๋ก ๋์ผํ๋ฉฐ,
|
| 412 |
+
์ฒญํฌ๋ chunk_index๋ก ๊ตฌ๋ถํ๋ค.
|
| 413 |
+
"""
|
| 414 |
+
path = Path(file_path)
|
| 415 |
+
ext = path.suffix.lower()
|
| 416 |
+
|
| 417 |
+
if ext not in _PARSERS:
|
| 418 |
+
raise ValueError(
|
| 419 |
+
f"์ง์ํ์ง ์๋ ํ์ผ ํ์: {ext} "
|
| 420 |
+
f"(์ง์: {', '.join(sorted(self.SUPPORTED_EXTENSIONS))})"
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
logger.info(f"๋ฌธ์ ํ์ฑ ์์: {file_path} (type={doc_type.value})")
|
| 424 |
+
|
| 425 |
+
units: List[Tuple[Optional[int], str]] = []
|
| 426 |
+
page_parser = _PAGE_PARSERS.get(ext)
|
| 427 |
+
if page_parser is not None:
|
| 428 |
+
for page_number, page_text in page_parser(file_path):
|
| 429 |
+
cleaned_page = _clean_text(page_text)
|
| 430 |
+
if cleaned_page:
|
| 431 |
+
units.append((page_number, cleaned_page))
|
| 432 |
+
else:
|
| 433 |
+
raw_text = _PARSERS[ext](file_path)
|
| 434 |
+
if not raw_text.strip():
|
| 435 |
+
logger.warning(f"๋น ๋ฌธ์: {file_path}")
|
| 436 |
+
return []
|
| 437 |
+
|
| 438 |
+
cleaned = _clean_text(raw_text)
|
| 439 |
+
if not cleaned:
|
| 440 |
+
logger.warning(f"์ ์ ํ ๋น ๋ฌธ์: {file_path}")
|
| 441 |
+
return []
|
| 442 |
+
units.append((None, cleaned))
|
| 443 |
+
|
| 444 |
+
if not units:
|
| 445 |
+
logger.warning(f"์ ์ ํ ๋น ๋ฌธ์: {file_path}")
|
| 446 |
+
return []
|
| 447 |
+
|
| 448 |
+
chunk_entries: List[Tuple[str, Optional[int]]] = []
|
| 449 |
+
for page_number, cleaned_text in units:
|
| 450 |
+
chunks = _hybrid_chunk(
|
| 451 |
+
cleaned_text,
|
| 452 |
+
doc_type,
|
| 453 |
+
chunk_size=self.chunk_size,
|
| 454 |
+
chunk_overlap=self.chunk_overlap,
|
| 455 |
+
min_chunk_tokens=self.min_chunk_tokens,
|
| 456 |
+
)
|
| 457 |
+
for chunk in chunks:
|
| 458 |
+
chunk_entries.append((chunk, page_number))
|
| 459 |
+
|
| 460 |
+
if not chunk_entries:
|
| 461 |
+
logger.warning(f"์ฒญํน ๊ฒฐ๊ณผ ์์: {file_path}")
|
| 462 |
+
return []
|
| 463 |
+
|
| 464 |
+
logger.info(f"์ฒญํน ์๋ฃ: {len(chunk_entries)}๊ฐ ์ฒญํฌ ์์ฑ ({file_path})")
|
| 465 |
+
|
| 466 |
+
# 4. ๋ฉํ๋ฐ์ดํฐ ์์ฑ
|
| 467 |
+
now_iso = datetime.now(timezone.utc).isoformat()
|
| 468 |
+
doc_title = title or path.stem
|
| 469 |
+
score = (
|
| 470 |
+
reliability_score
|
| 471 |
+
if reliability_score is not None
|
| 472 |
+
else _DEFAULT_RELIABILITY.get(doc_type, 0.5)
|
| 473 |
+
)
|
| 474 |
+
# doc_id: ์๋ณธ ๋ฌธ์ ๋จ์ ์์ ID (๋ชจ๋ ์ฒญํฌ๊ฐ ๋์ผ)
|
| 475 |
+
doc_id = (
|
| 476 |
+
document_id or hashlib.sha256(f"{file_path}:{doc_type.value}".encode()).hexdigest()[:12]
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
results: List[DocumentMetadata] = []
|
| 480 |
+
for idx, (chunk, page_number) in enumerate(chunk_entries):
|
| 481 |
+
chunk_extras = dict(extras or {})
|
| 482 |
+
chunk_extras.update(
|
| 483 |
+
{
|
| 484 |
+
"chunk_text": chunk,
|
| 485 |
+
"file_path": str(path),
|
| 486 |
+
"file_extension": ext,
|
| 487 |
+
"chunk_id": f"{doc_id}:{idx}",
|
| 488 |
+
}
|
| 489 |
+
)
|
| 490 |
+
if page_number is not None:
|
| 491 |
+
chunk_extras["page"] = page_number
|
| 492 |
+
|
| 493 |
+
meta = DocumentMetadata(
|
| 494 |
+
doc_id=doc_id,
|
| 495 |
+
doc_type=doc_type.value,
|
| 496 |
+
source=source,
|
| 497 |
+
title=doc_title,
|
| 498 |
+
category=category,
|
| 499 |
+
reliability_score=score,
|
| 500 |
+
created_at=now_iso,
|
| 501 |
+
updated_at=now_iso,
|
| 502 |
+
valid_from=valid_from,
|
| 503 |
+
valid_until=valid_until,
|
| 504 |
+
chunk_index=idx,
|
| 505 |
+
chunk_total=len(chunk_entries),
|
| 506 |
+
extras=chunk_extras,
|
| 507 |
+
)
|
| 508 |
+
results.append(meta)
|
| 509 |
+
|
| 510 |
+
return results
|
| 511 |
+
|
| 512 |
+
def process_batch(
|
| 513 |
+
self,
|
| 514 |
+
file_paths: List[str],
|
| 515 |
+
doc_type: IndexType,
|
| 516 |
+
**kwargs: Any,
|
| 517 |
+
) -> BatchResult:
|
| 518 |
+
"""์ฌ๋ฌ ํ์ผ์ ์ผ๊ด ์ฒ๋ฆฌํ๋ค.
|
| 519 |
+
|
| 520 |
+
Returns
|
| 521 |
+
-------
|
| 522 |
+
BatchResult
|
| 523 |
+
์ฑ๊ณตํ ์ฒญํฌ ๋ฆฌ์คํธ์ ์คํจํ ํ์ผ ์ ๋ณด๋ฅผ ๋ชจ๋ ํฌํจ.
|
| 524 |
+
"""
|
| 525 |
+
result = BatchResult()
|
| 526 |
+
for fp in file_paths:
|
| 527 |
+
try:
|
| 528 |
+
chunks = self.process(fp, doc_type, **kwargs)
|
| 529 |
+
result.succeeded.extend(chunks)
|
| 530 |
+
except Exception as e:
|
| 531 |
+
logger.error(f"๋ฌธ์ ์ฒ๋ฆฌ ์คํจ: {fp} โ {e}")
|
| 532 |
+
result.failed.append((fp, str(e)))
|
| 533 |
+
logger.info(
|
| 534 |
+
f"๋ฐฐ์น ์ฒ๋ฆฌ ์๋ฃ: {len(file_paths)}๊ฐ ํ์ผ โ "
|
| 535 |
+
f"{result.total_chunks}๊ฐ ์ฒญํฌ, {len(result.failed)}๊ฐ ์คํจ"
|
| 536 |
+
)
|
| 537 |
+
return result
|
| 538 |
+
|
| 539 |
+
def parse_only(self, file_path: str) -> str:
|
| 540 |
+
"""ํ์ฑ + ์ ์ ๋ง ์ํํ๊ณ ํ
์คํธ๋ฅผ ๋ฐํํ๋ค (์ฒญํน ์์)."""
|
| 541 |
+
ext = Path(file_path).suffix.lower()
|
| 542 |
+
if ext not in _PARSERS:
|
| 543 |
+
raise ValueError(f"์ง์ํ์ง ์๋ ํ์ผ ํ์: {ext}")
|
| 544 |
+
raw = _PARSERS[ext](file_path)
|
| 545 |
+
return _clean_text(raw)
|
src/inference/feature_flags.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Feature Flag ๊ด๋ฆฌ ๋ชจ๋.
|
| 2 |
+
|
| 3 |
+
ํ๊ฒฝ๋ณ์ ๊ธฐ๋ฐ Feature Flag์ X-Feature-Flag ํค๋๋ฅผ ํตํ ์์ฒญ๋ณ ์ค๋ฒ๋ผ์ด๋๋ฅผ ์ง์ํ๋ค.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from dataclasses import asdict, dataclass
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from loguru import logger
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass(frozen=True)
|
| 14 |
+
class FeatureFlags:
|
| 15 |
+
"""๋ฐํ์ Feature Flag ์ค์ ."""
|
| 16 |
+
|
| 17 |
+
use_rag_pipeline: bool = True
|
| 18 |
+
model_version: str = "v2_lora" # v1_lora | v2_lora
|
| 19 |
+
|
| 20 |
+
@classmethod
|
| 21 |
+
def from_env(cls) -> "FeatureFlags":
|
| 22 |
+
"""ํ๊ฒฝ๋ณ์์์ Feature Flag๋ฅผ ๋ก๋ํ๋ค."""
|
| 23 |
+
flags = cls(
|
| 24 |
+
use_rag_pipeline=os.getenv("USE_RAG_PIPELINE", "true").lower() in ("true", "1", "yes"),
|
| 25 |
+
model_version=os.getenv("MODEL_VERSION", "v2_lora"),
|
| 26 |
+
)
|
| 27 |
+
logger.info(f"Feature Flags ๋ก๋: {flags}")
|
| 28 |
+
return flags
|
| 29 |
+
|
| 30 |
+
def override_from_header(self, header_value: Optional[str]) -> "FeatureFlags":
|
| 31 |
+
"""X-Feature-Flag ํค๋์์ ๋ฐํ์ ์ค๋ฒ๋ผ์ด๋.
|
| 32 |
+
|
| 33 |
+
ํ์: 'USE_RAG_PIPELINE=false,MODEL_VERSION=v1_lora'
|
| 34 |
+
์๋ณธ ์ธ์คํด์ค๋ ๋ณ๊ฒฝ๋์ง ์์ผ๋ฉฐ ์ ์ธ์คํด์ค๋ฅผ ๋ฐํํ๋ค.
|
| 35 |
+
"""
|
| 36 |
+
if not header_value:
|
| 37 |
+
return self
|
| 38 |
+
|
| 39 |
+
overrides: dict = {}
|
| 40 |
+
for pair in header_value.split(","):
|
| 41 |
+
pair = pair.strip()
|
| 42 |
+
if "=" not in pair:
|
| 43 |
+
continue
|
| 44 |
+
key, value = pair.split("=", 1)
|
| 45 |
+
key = key.strip().upper()
|
| 46 |
+
value = value.strip()
|
| 47 |
+
|
| 48 |
+
if key == "USE_RAG_PIPELINE":
|
| 49 |
+
overrides["use_rag_pipeline"] = value.lower() in ("true", "1", "yes")
|
| 50 |
+
elif key == "MODEL_VERSION":
|
| 51 |
+
if value in ("v1_lora", "v2_lora"):
|
| 52 |
+
overrides["model_version"] = value
|
| 53 |
+
|
| 54 |
+
if overrides:
|
| 55 |
+
current = asdict(self)
|
| 56 |
+
current.update(overrides)
|
| 57 |
+
return FeatureFlags(**current)
|
| 58 |
+
return self
|
src/inference/graph/__init__.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn LangGraph runtime ํจํค์ง.
|
| 2 |
+
|
| 3 |
+
Issue #415: LangGraph runtime ๊ธฐ๋ฐ ๋ฐ planner/executor adapter ๊ตฌ์ฑ.
|
| 4 |
+
|
| 5 |
+
์ฃผ์ public API:
|
| 6 |
+
- `build_govon_graph`: StateGraph ๋น๋ ํจ์
|
| 7 |
+
- `GovOnGraphState`: graph state TypedDict
|
| 8 |
+
- `ApprovalStatus`, `TaskType`, `ToolPlan`: state ๊ด๋ จ ํ์
|
| 9 |
+
- `PlannerAdapter`, `LLMPlannerAdapter`: planner ์ถ์ํ (LLMPlannerAdapter๊ฐ ๊ธฐ๋ณธ)
|
| 10 |
+
- `RegexPlannerAdapter`: CI fallback planner (SKIP_MODEL_LOAD=true ํ๊ฒฝ ์ ์ฉ)
|
| 11 |
+
- `ExecutorAdapter`, `RegistryExecutorAdapter`: executor ์ถ์ํ
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from .builder import build_govon_graph
|
| 15 |
+
from .executor_adapter import ExecutorAdapter, RegistryExecutorAdapter
|
| 16 |
+
from .planner_adapter import LLMPlannerAdapter, PlannerAdapter, RegexPlannerAdapter
|
| 17 |
+
from .state import ApprovalStatus, GovOnGraphState, TaskType, ToolPlan
|
| 18 |
+
|
| 19 |
+
__all__ = [
|
| 20 |
+
"build_govon_graph",
|
| 21 |
+
"GovOnGraphState",
|
| 22 |
+
"ApprovalStatus",
|
| 23 |
+
"TaskType",
|
| 24 |
+
"ToolPlan",
|
| 25 |
+
"PlannerAdapter",
|
| 26 |
+
"RegexPlannerAdapter",
|
| 27 |
+
"LLMPlannerAdapter",
|
| 28 |
+
"ExecutorAdapter",
|
| 29 |
+
"RegistryExecutorAdapter",
|
| 30 |
+
]
|
src/inference/graph/builder.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn LangGraph StateGraph ๋น๋.
|
| 2 |
+
|
| 3 |
+
Issue #415: LangGraph runtime ๊ธฐ๋ฐ ๋ฐ planner/executor adapter ๊ตฌ์ฑ.
|
| 4 |
+
|
| 5 |
+
`build_govon_graph()` ํจ์๊ฐ 6-node StateGraph๋ฅผ ์กฐ๋ฆฝํ๊ณ
|
| 6 |
+
์ปดํ์ผ๋ graph๋ฅผ ๋ฐํํ๋ค.
|
| 7 |
+
|
| 8 |
+
Graph topology:
|
| 9 |
+
START -> session_load -> planner -> approval_wait
|
| 10 |
+
-> [approved] tool_execute -> synthesis -> persist -> END
|
| 11 |
+
-> [rejected] persist -> END
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
from typing import TYPE_CHECKING, Optional
|
| 17 |
+
|
| 18 |
+
from langgraph.graph import END, START, StateGraph
|
| 19 |
+
from langgraph.utils.runnable import RunnableCallable
|
| 20 |
+
|
| 21 |
+
from .executor_adapter import ExecutorAdapter
|
| 22 |
+
from .nodes import (
|
| 23 |
+
approval_wait_node,
|
| 24 |
+
persist_node,
|
| 25 |
+
planner_node,
|
| 26 |
+
session_load_node,
|
| 27 |
+
synthesis_node,
|
| 28 |
+
tool_execute_node,
|
| 29 |
+
)
|
| 30 |
+
from .planner_adapter import PlannerAdapter
|
| 31 |
+
from .state import ApprovalStatus, GovOnGraphState
|
| 32 |
+
|
| 33 |
+
if TYPE_CHECKING:
|
| 34 |
+
from src.inference.session_context import SessionStore
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def route_after_approval(state: GovOnGraphState) -> str:
|
| 38 |
+
"""approval_wait ์ดํ ๋ถ๊ธฐ ์กฐ๊ฑด.
|
| 39 |
+
|
| 40 |
+
`approval_status` ๊ฐ์ ๋ฐ๋ผ ๋ค์ ๋
ธ๋๋ฅผ ๊ฒฐ์ ํ๋ค.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
state : GovOnGraphState
|
| 45 |
+
ํ์ฌ graph state.
|
| 46 |
+
|
| 47 |
+
Returns
|
| 48 |
+
-------
|
| 49 |
+
str
|
| 50 |
+
"tool_execute" (์น์ธ) ๋๋ "persist" (๊ฑฐ์ ).
|
| 51 |
+
"""
|
| 52 |
+
if state.get("approval_status") == ApprovalStatus.APPROVED.value:
|
| 53 |
+
return "tool_execute"
|
| 54 |
+
return "persist"
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def build_govon_graph(
|
| 58 |
+
planner_adapter: PlannerAdapter,
|
| 59 |
+
executor_adapter: ExecutorAdapter,
|
| 60 |
+
session_store: "SessionStore",
|
| 61 |
+
checkpointer: Optional[object] = None,
|
| 62 |
+
) -> object:
|
| 63 |
+
"""GovOn MVP StateGraph๋ฅผ ๊ตฌ์ฑํ๊ณ ์ปดํ์ผํ๋ค.
|
| 64 |
+
|
| 65 |
+
6๊ฐ ๋
ธ๋๋ฅผ ์กฐ๋ฆฝํ๊ณ conditional edge๋ก approval gate๋ฅผ ์ฐ๊ฒฐํ๋ค.
|
| 66 |
+
checkpointer๊ฐ None์ด๋ฉด `MemorySaver`๋ฅผ ์ฌ์ฉํ๋ค.
|
| 67 |
+
|
| 68 |
+
Parameters
|
| 69 |
+
----------
|
| 70 |
+
planner_adapter : PlannerAdapter
|
| 71 |
+
planner ์ด๋ํฐ ์ธ์คํด์ค.
|
| 72 |
+
์ด์ ํ๊ฒฝ์์๋ `LLMPlannerAdapter`๋ฅผ ์ฌ์ฉํ๋ค.
|
| 73 |
+
CI ํ๊ฒฝ์์๋ `RegexPlannerAdapter`๊ฐ fallback์ผ๋ก ๋์ํ๋ค.
|
| 74 |
+
executor_adapter : ExecutorAdapter
|
| 75 |
+
tool executor ์ด๋ํฐ ์ธ์คํด์ค.
|
| 76 |
+
session_store : SessionStore
|
| 77 |
+
GovOn ์ธ์
์ ์ฅ์. session_load์ persist ๋
ธ๋์์ ์ฌ์ฉํ๋ค.
|
| 78 |
+
checkpointer : optional
|
| 79 |
+
LangGraph checkpoint ์ ์ฅ์.
|
| 80 |
+
None์ด๋ฉด MemorySaver๋ฅผ ์ฌ์ฉํ๋ค (๋ฉ๋ชจ๋ฆฌ์๋ง ์ ์ฅ, ์ฌ์์ ์ ์๋ฉธ).
|
| 81 |
+
ํ๋ก๋์
์์๋ `AsyncSqliteSaver`๋ฅผ ์ฃผ์
ํ๋ค.
|
| 82 |
+
|
| 83 |
+
Returns
|
| 84 |
+
-------
|
| 85 |
+
CompiledGraph
|
| 86 |
+
์ปดํ์ผ๋ LangGraph. `ainvoke()`, `aget_state()` ๋ฑ์ ์ฌ์ฉํ ์ ์๋ค.
|
| 87 |
+
"""
|
| 88 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 89 |
+
|
| 90 |
+
graph = StateGraph(GovOnGraphState)
|
| 91 |
+
|
| 92 |
+
# --- ๋
ธ๋ ๋ฑ๋ก (closure๋ก adapter์ session_store ์ฃผ์
) ---
|
| 93 |
+
|
| 94 |
+
async def _session_load(state: GovOnGraphState) -> dict:
|
| 95 |
+
return await session_load_node(state, session_store=session_store)
|
| 96 |
+
|
| 97 |
+
async def _planner(state: GovOnGraphState) -> dict:
|
| 98 |
+
return await planner_node(state, planner_adapter=planner_adapter)
|
| 99 |
+
|
| 100 |
+
async def _tool_execute(state: GovOnGraphState) -> dict:
|
| 101 |
+
return await tool_execute_node(state, executor_adapter=executor_adapter)
|
| 102 |
+
|
| 103 |
+
async def _synthesis(state: GovOnGraphState) -> dict:
|
| 104 |
+
return await synthesis_node(state)
|
| 105 |
+
|
| 106 |
+
async def _persist(state: GovOnGraphState) -> dict:
|
| 107 |
+
return await persist_node(state, session_store=session_store)
|
| 108 |
+
|
| 109 |
+
graph.add_node("session_load", _session_load)
|
| 110 |
+
graph.add_node("planner", _planner)
|
| 111 |
+
# Preserve sync execution for interrupt() on Python 3.10.
|
| 112 |
+
# The default add_node(sync_fn) path auto-generates an async executor wrapper,
|
| 113 |
+
# which breaks LangGraph interrupt context under ainvoke().
|
| 114 |
+
graph.add_node(
|
| 115 |
+
"approval_wait",
|
| 116 |
+
RunnableCallable(approval_wait_node, name="approval_wait"),
|
| 117 |
+
)
|
| 118 |
+
graph.add_node("tool_execute", _tool_execute)
|
| 119 |
+
graph.add_node("synthesis", _synthesis)
|
| 120 |
+
graph.add_node("persist", _persist)
|
| 121 |
+
|
| 122 |
+
# --- ์ฃ์ง ---
|
| 123 |
+
graph.add_edge(START, "session_load")
|
| 124 |
+
graph.add_edge("session_load", "planner")
|
| 125 |
+
graph.add_edge("planner", "approval_wait")
|
| 126 |
+
graph.add_conditional_edges(
|
| 127 |
+
"approval_wait",
|
| 128 |
+
route_after_approval,
|
| 129 |
+
{
|
| 130 |
+
"tool_execute": "tool_execute",
|
| 131 |
+
"persist": "persist",
|
| 132 |
+
},
|
| 133 |
+
)
|
| 134 |
+
graph.add_edge("tool_execute", "synthesis")
|
| 135 |
+
graph.add_edge("synthesis", "persist")
|
| 136 |
+
graph.add_edge("persist", END)
|
| 137 |
+
|
| 138 |
+
# --- ์ปดํ์ผ ---
|
| 139 |
+
saver = checkpointer if checkpointer is not None else MemorySaver()
|
| 140 |
+
compiled = graph.compile(checkpointer=saver)
|
| 141 |
+
|
| 142 |
+
return compiled
|
src/inference/graph/capabilities/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LangGraph capabilities ํจํค์ง โ ํ์คํ๋ tool capability ์ธํฐํ์ด์ค."""
|
| 2 |
+
|
| 3 |
+
from .api_lookup import ApiLookupCapability, ApiLookupParams
|
| 4 |
+
from .append_evidence import AppendEvidenceCapability
|
| 5 |
+
from .base import CapabilityBase, CapabilityMetadata, LookupResult
|
| 6 |
+
from .defaults import get_all_defaults, get_max_retries, get_timeout
|
| 7 |
+
from .draft_civil_response import DraftCivilResponseCapability
|
| 8 |
+
from .rag_search import RagSearchCapability, RagSearchParams
|
| 9 |
+
from .registry import (
|
| 10 |
+
MVP_CAPABILITY_IDS,
|
| 11 |
+
build_mvp_registry,
|
| 12 |
+
get_all_metadata,
|
| 13 |
+
get_mvp_capability_ids,
|
| 14 |
+
is_mvp_capability,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
__all__ = [
|
| 18 |
+
"CapabilityBase",
|
| 19 |
+
"CapabilityMetadata",
|
| 20 |
+
"LookupResult",
|
| 21 |
+
"ApiLookupCapability",
|
| 22 |
+
"ApiLookupParams",
|
| 23 |
+
"RagSearchCapability",
|
| 24 |
+
"RagSearchParams",
|
| 25 |
+
"DraftCivilResponseCapability",
|
| 26 |
+
"AppendEvidenceCapability",
|
| 27 |
+
"MVP_CAPABILITY_IDS",
|
| 28 |
+
"build_mvp_registry",
|
| 29 |
+
"get_all_metadata",
|
| 30 |
+
"get_mvp_capability_ids",
|
| 31 |
+
"is_mvp_capability",
|
| 32 |
+
"get_timeout",
|
| 33 |
+
"get_max_retries",
|
| 34 |
+
"get_all_defaults",
|
| 35 |
+
]
|
src/inference/graph/capabilities/api_lookup.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""unified api_lookup capability โ MinwonAnalysisAction ๋ํ."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import asyncio
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from typing import Any, Dict, Optional
|
| 8 |
+
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
from .base import CapabilityBase, CapabilityMetadata, EvidenceEnvelope, EvidenceItem, LookupResult
|
| 12 |
+
from .defaults import get_timeout
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import httpx
|
| 16 |
+
|
| 17 |
+
_HTTPX_AVAILABLE = True
|
| 18 |
+
except ImportError:
|
| 19 |
+
httpx = None # type: ignore
|
| 20 |
+
_HTTPX_AVAILABLE = False
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# ํ๋ผ๋ฏธํฐ validator
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class ApiLookupParams:
|
| 30 |
+
"""api_lookup ํธ์ถ ํ๋ผ๋ฏธํฐ โ context์์ ์ถ์ถยท์ ๊ทํยท๊ฒ์ฆ."""
|
| 31 |
+
|
| 32 |
+
query: str
|
| 33 |
+
ret_count: int = 5
|
| 34 |
+
min_score: int = 2
|
| 35 |
+
|
| 36 |
+
@classmethod
|
| 37 |
+
def from_context(cls, query: str, context: Dict[str, Any]) -> "ApiLookupParams":
|
| 38 |
+
"""context์์ ํ๋ผ๋ฏธํฐ๋ฅผ ์ถ์ถํ๊ณ alias๋ฅผ ์ ๊ทํํ๋ค."""
|
| 39 |
+
|
| 40 |
+
def _first_not_none(*values, default):
|
| 41 |
+
for v in values:
|
| 42 |
+
if v is not None:
|
| 43 |
+
return v
|
| 44 |
+
return default
|
| 45 |
+
|
| 46 |
+
ret_count = int(
|
| 47 |
+
_first_not_none(
|
| 48 |
+
context.get("api_lookup_count"),
|
| 49 |
+
context.get("ret_count"),
|
| 50 |
+
context.get("count"),
|
| 51 |
+
default=5,
|
| 52 |
+
)
|
| 53 |
+
)
|
| 54 |
+
min_score = int(
|
| 55 |
+
_first_not_none(
|
| 56 |
+
context.get("api_lookup_min_score"),
|
| 57 |
+
context.get("min_score"),
|
| 58 |
+
context.get("score_threshold"),
|
| 59 |
+
default=2,
|
| 60 |
+
)
|
| 61 |
+
)
|
| 62 |
+
return cls(
|
| 63 |
+
query=query.strip(),
|
| 64 |
+
ret_count=max(1, min(20, ret_count)),
|
| 65 |
+
min_score=max(0, min(10, min_score)),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
def validate(self) -> Optional[str]:
|
| 69 |
+
"""๊ฒ์ฆ ์คํจ ์ ์ค๋ฅ ๋ฉ์์ง, ํต๊ณผ ์ None."""
|
| 70 |
+
if not self.query:
|
| 71 |
+
return "query๊ฐ ๋น์ด์์ต๋๋ค"
|
| 72 |
+
if len(self.query) > 500:
|
| 73 |
+
return f"query๊ฐ ๋๋ฌด ๊น๋๋ค ({len(self.query)}์, ์ต๋ 500์)"
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ---------------------------------------------------------------------------
|
| 78 |
+
# Capability
|
| 79 |
+
# ---------------------------------------------------------------------------
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class ApiLookupCapability(CapabilityBase):
|
| 83 |
+
"""๊ณต๊ณต๋ฐ์ดํฐํฌํธ ๋ฏผ์๋ถ์์ ๋ณด์กฐํ API๋ฅผ LangGraph capability๋ก ๋ํ.
|
| 84 |
+
|
| 85 |
+
Parameters
|
| 86 |
+
----------
|
| 87 |
+
action : Optional[MinwonAnalysisAction]
|
| 88 |
+
๋ํํ ๊ธฐ์กด Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ค
|
| 89 |
+
(SKIP_MODEL_LOAD ๋ฑ ๊ฒฝ๋ ํ๊ฒฝ ์ง์).
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
def __init__(self, action: Optional[Any] = None) -> None:
|
| 93 |
+
self._action = action
|
| 94 |
+
|
| 95 |
+
@property
|
| 96 |
+
def metadata(self) -> CapabilityMetadata:
|
| 97 |
+
return CapabilityMetadata(
|
| 98 |
+
name="api_lookup",
|
| 99 |
+
description="๊ณต๊ณต๋ฐ์ดํฐํฌํธ ๋ฏผ์๋ถ์์ ๋ณด์กฐํ API๋ฅผ ํธ์ถํ์ฌ ์ ์ฌ ๋ฏผ์ ์ฌ๋ก๋ฅผ ๊ฒ์ํฉ๋๋ค.",
|
| 100 |
+
approval_summary="์ธ๋ถ API(data.go.kr)์์ ์ ์ฌ ๋ฏผ์ ์ฌ๋ก๋ฅผ ์กฐํํฉ๋๋ค.",
|
| 101 |
+
provider="data.go.kr",
|
| 102 |
+
timeout_sec=get_timeout("api_lookup"),
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
async def execute(
|
| 106 |
+
self,
|
| 107 |
+
query: str,
|
| 108 |
+
context: Dict[str, Any],
|
| 109 |
+
session: Any,
|
| 110 |
+
) -> LookupResult:
|
| 111 |
+
"""MinwonAnalysisAction.fetch_similar_cases๋ฅผ ๋ํํ์ฌ LookupResult๋ก ๋ฐํ."""
|
| 112 |
+
provider = self.metadata.provider
|
| 113 |
+
|
| 114 |
+
# ํ๋ผ๋ฏธํฐ ์ถ์ถ ๋ฐ ๊ฒ์ฆ (action ์ ๋ฌด์ ๋ฌด๊ดํ๊ฒ ํญ์ ์ํ)
|
| 115 |
+
params = ApiLookupParams.from_context(query, context)
|
| 116 |
+
validation_error = params.validate()
|
| 117 |
+
if validation_error:
|
| 118 |
+
return LookupResult(
|
| 119 |
+
success=False,
|
| 120 |
+
query=query,
|
| 121 |
+
provider=provider,
|
| 122 |
+
error=validation_error,
|
| 123 |
+
empty_reason="validation_error",
|
| 124 |
+
evidence=EvidenceEnvelope(
|
| 125 |
+
status="error",
|
| 126 |
+
errors=[validation_error],
|
| 127 |
+
),
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# action์ด ์์ผ๋ฉด ๋น ๊ฒฐ๊ณผ (๊ฒฝ๋ ํ๊ฒฝ)
|
| 131 |
+
if self._action is None:
|
| 132 |
+
logger.debug("[api_lookup] action์ด None โ ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 133 |
+
return LookupResult(
|
| 134 |
+
success=True,
|
| 135 |
+
query=params.query,
|
| 136 |
+
provider=provider,
|
| 137 |
+
empty_reason="no_match",
|
| 138 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# ํ๋ผ๋ฏธํฐ๋ฅผ ์ธ์๋ก ์ ๋ฌ (shared state ๋ณ๊ฒฝ ์์ด thread-safe)
|
| 142 |
+
try:
|
| 143 |
+
payload = await asyncio.wait_for(
|
| 144 |
+
self._action.fetch_similar_cases(
|
| 145 |
+
params.query,
|
| 146 |
+
context,
|
| 147 |
+
ret_count=params.ret_count,
|
| 148 |
+
min_score=params.min_score,
|
| 149 |
+
),
|
| 150 |
+
timeout=self.metadata.timeout_sec,
|
| 151 |
+
)
|
| 152 |
+
except asyncio.TimeoutError:
|
| 153 |
+
timeout_msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)"
|
| 154 |
+
logger.warning(f"[api_lookup] ํ์์์ ({self.metadata.timeout_sec}s ์ด๊ณผ)")
|
| 155 |
+
return LookupResult(
|
| 156 |
+
success=False,
|
| 157 |
+
query=params.query,
|
| 158 |
+
provider=provider,
|
| 159 |
+
error=timeout_msg,
|
| 160 |
+
empty_reason="provider_error",
|
| 161 |
+
evidence=EvidenceEnvelope(
|
| 162 |
+
status="error",
|
| 163 |
+
errors=[timeout_msg],
|
| 164 |
+
),
|
| 165 |
+
)
|
| 166 |
+
except Exception as exc:
|
| 167 |
+
if _HTTPX_AVAILABLE and isinstance(exc, httpx.HTTPError):
|
| 168 |
+
logger.warning(f"[api_lookup] httpx ์ค๋ฅ: {exc}")
|
| 169 |
+
else:
|
| 170 |
+
logger.error(f"[api_lookup] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 171 |
+
return LookupResult(
|
| 172 |
+
success=False,
|
| 173 |
+
query=params.query,
|
| 174 |
+
provider=provider,
|
| 175 |
+
error=str(exc),
|
| 176 |
+
empty_reason="provider_error",
|
| 177 |
+
evidence=EvidenceEnvelope(
|
| 178 |
+
status="error",
|
| 179 |
+
errors=[str(exc)],
|
| 180 |
+
),
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# ๊ฒฐ๊ณผ ๋ณํ
|
| 184 |
+
results = payload.get("results")
|
| 185 |
+
if results is None:
|
| 186 |
+
error_msg = "๋ฏผ์ ๋ถ์ API ํธ์ถ์ ์คํจํ์ต๋๋ค."
|
| 187 |
+
return LookupResult(
|
| 188 |
+
success=False,
|
| 189 |
+
query=payload.get("query", params.query),
|
| 190 |
+
provider=provider,
|
| 191 |
+
error=error_msg,
|
| 192 |
+
empty_reason="provider_error",
|
| 193 |
+
evidence=EvidenceEnvelope(
|
| 194 |
+
status="error",
|
| 195 |
+
errors=[error_msg],
|
| 196 |
+
),
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# citations๋ฅผ dict ๋ชฉ๋ก์ผ๋ก ์ ๊ทํ
|
| 200 |
+
raw_citations = payload.get("citations", [])
|
| 201 |
+
citations: list[Dict[str, Any]] = []
|
| 202 |
+
for c in raw_citations:
|
| 203 |
+
if isinstance(c, dict):
|
| 204 |
+
citations.append(c)
|
| 205 |
+
elif hasattr(c, "__dict__"):
|
| 206 |
+
citations.append({k: v for k, v in c.__dict__.items() if not k.startswith("_")})
|
| 207 |
+
|
| 208 |
+
if not results:
|
| 209 |
+
return LookupResult(
|
| 210 |
+
success=True,
|
| 211 |
+
query=payload.get("query", params.query),
|
| 212 |
+
provider=provider,
|
| 213 |
+
empty_reason="no_match",
|
| 214 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
# EvidenceItem์ผ๋ก ์ ๊ทํ
|
| 218 |
+
evidence_items = []
|
| 219 |
+
for item in results:
|
| 220 |
+
if not isinstance(item, dict):
|
| 221 |
+
continue
|
| 222 |
+
title = item.get("title") or item.get("qnaTitle") or item.get("question", "")
|
| 223 |
+
excerpt = item.get("content") or item.get("qnaContent") or item.get("qnaAnswer", "")
|
| 224 |
+
link = item.get("url") or item.get("detailUrl", "")
|
| 225 |
+
evidence_items.append(
|
| 226 |
+
EvidenceItem(
|
| 227 |
+
source_type="api",
|
| 228 |
+
title=str(title),
|
| 229 |
+
excerpt=str(excerpt)[:500],
|
| 230 |
+
link_or_path=str(link),
|
| 231 |
+
score=float(item.get("score", 0)),
|
| 232 |
+
provider_meta={"provider": provider},
|
| 233 |
+
)
|
| 234 |
+
)
|
| 235 |
+
# citations๋ EvidenceItem์ผ๋ก ๋ณํ (์ค๋ณต ์ ๊ฑฐ๋ฅผ ์ํด link_or_path ๊ธฐ๋ฐ dedup)
|
| 236 |
+
seen_links: set[str] = {item.link_or_path for item in evidence_items}
|
| 237 |
+
for c in citations:
|
| 238 |
+
link = c.get("url") or c.get("detailUrl", "")
|
| 239 |
+
if link in seen_links:
|
| 240 |
+
continue
|
| 241 |
+
seen_links.add(str(link))
|
| 242 |
+
title = c.get("title") or c.get("qnaTitle") or c.get("question", "")
|
| 243 |
+
excerpt = c.get("content") or c.get("qnaContent") or c.get("qnaAnswer", "")
|
| 244 |
+
evidence_items.append(
|
| 245 |
+
EvidenceItem(
|
| 246 |
+
source_type="api",
|
| 247 |
+
title=str(title),
|
| 248 |
+
excerpt=str(excerpt)[:500],
|
| 249 |
+
link_or_path=str(link),
|
| 250 |
+
score=float(c.get("score", 0)),
|
| 251 |
+
provider_meta={"provider": provider},
|
| 252 |
+
)
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
envelope = EvidenceEnvelope(
|
| 256 |
+
items=evidence_items,
|
| 257 |
+
status="ok" if evidence_items else "empty",
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
return LookupResult(
|
| 261 |
+
success=True,
|
| 262 |
+
query=payload.get("query", params.query),
|
| 263 |
+
results=results,
|
| 264 |
+
context_text=payload.get("context_text", ""),
|
| 265 |
+
citations=citations,
|
| 266 |
+
provider=provider,
|
| 267 |
+
evidence=envelope,
|
| 268 |
+
)
|
src/inference/graph/capabilities/append_evidence.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""append_evidence capability โ ๊ธฐ์กด closure๋ฅผ CapabilityBase๋ก ๋ํ."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any, Callable, Dict
|
| 6 |
+
|
| 7 |
+
from .base import CapabilityBase, CapabilityMetadata, EvidenceEnvelope, EvidenceItem, LookupResult
|
| 8 |
+
from .defaults import get_timeout
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AppendEvidenceCapability(CapabilityBase):
|
| 12 |
+
"""๊ทผ๊ฑฐ/์ถ์ฒ ๋ณด๊ฐ capability.
|
| 13 |
+
|
| 14 |
+
๊ธฐ์กด api_server์ _append_evidence_tool closure๋ฅผ ์ฃผ์
๋ฐ์
|
| 15 |
+
CapabilityBase ์ธํฐํ์ด์ค๋ก ๋ํํ๋ค.
|
| 16 |
+
|
| 17 |
+
Parameters
|
| 18 |
+
----------
|
| 19 |
+
execute_fn : Callable
|
| 20 |
+
``async (query, context, session) -> dict`` ์๊ทธ๋์ฒ์ ์คํ ํจ์.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, execute_fn: Callable[..., Any]) -> None:
|
| 24 |
+
self._execute_fn = execute_fn
|
| 25 |
+
|
| 26 |
+
@property
|
| 27 |
+
def metadata(self) -> CapabilityMetadata:
|
| 28 |
+
return CapabilityMetadata(
|
| 29 |
+
name="append_evidence",
|
| 30 |
+
description=(
|
| 31 |
+
"๊ธฐ์กด ๋ต๋ณ์ ๋ฒ๋ น ๊ทผ๊ฑฐ, ์ ์ฌ ์ฌ๋ก, ์ธ๋ถ ํต๊ณ ๋ฑ " "์ถ๊ฐ ์ถ์ฒ๋ฅผ ๋ณด๊ฐํฉ๋๋ค."
|
| 32 |
+
),
|
| 33 |
+
approval_summary="๊ธฐ์กด ๋ต๋ณ์ ๋ฒ์ ๊ทผ๊ฑฐ์ ์ถ์ฒ๋ฅผ ์ถ๊ฐํฉ๋๋ค.",
|
| 34 |
+
provider="local_vectordb+data.go.kr",
|
| 35 |
+
timeout_sec=get_timeout("append_evidence"),
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
async def execute(
|
| 39 |
+
self,
|
| 40 |
+
query: str,
|
| 41 |
+
context: Dict[str, Any],
|
| 42 |
+
session: Any,
|
| 43 |
+
) -> LookupResult:
|
| 44 |
+
"""์ฃผ์
๋ฐ์ ํจ์์ ์์ํ๊ณ ๊ฒฐ๊ณผ๋ฅผ LookupResult๋ก ๋ณํํ๋ค."""
|
| 45 |
+
raw = await self._execute_fn(query=query, context=context, session=session)
|
| 46 |
+
|
| 47 |
+
if isinstance(raw, dict) and raw.get("error"):
|
| 48 |
+
return LookupResult(
|
| 49 |
+
success=False,
|
| 50 |
+
query=query,
|
| 51 |
+
provider=self.metadata.provider,
|
| 52 |
+
error=raw["error"],
|
| 53 |
+
empty_reason="provider_error",
|
| 54 |
+
evidence=EvidenceEnvelope(
|
| 55 |
+
status="error",
|
| 56 |
+
errors=[raw["error"]],
|
| 57 |
+
),
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
text = raw.get("text", "") if isinstance(raw, dict) else str(raw)
|
| 61 |
+
citations = raw.get("api_citations", []) if isinstance(raw, dict) else []
|
| 62 |
+
rag_results = raw.get("rag_results", []) if isinstance(raw, dict) else []
|
| 63 |
+
|
| 64 |
+
# ์ด์ ๋จ๊ณ์ evidence๋ฅผ ํฉ์ฐํ์ฌ EvidenceEnvelope ๊ตฌ์ฑ
|
| 65 |
+
evidence_items: list[EvidenceItem] = []
|
| 66 |
+
errors: list[str] = []
|
| 67 |
+
|
| 68 |
+
# rag_results -> EvidenceItem ๋ณํ
|
| 69 |
+
for item in rag_results:
|
| 70 |
+
if not isinstance(item, dict):
|
| 71 |
+
continue
|
| 72 |
+
metadata = item.get("metadata", {}) or {}
|
| 73 |
+
evidence_items.append(
|
| 74 |
+
EvidenceItem(
|
| 75 |
+
source_type="rag",
|
| 76 |
+
title=item.get("title", ""),
|
| 77 |
+
excerpt=str(item.get("content", ""))[:500],
|
| 78 |
+
link_or_path=metadata.get("file_path", ""),
|
| 79 |
+
page=metadata.get("page"),
|
| 80 |
+
score=float(item.get("score", 0.0)),
|
| 81 |
+
provider_meta={"provider": "local_vectordb"},
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# api_citations -> EvidenceItem ๋ณํ
|
| 86 |
+
for c in citations:
|
| 87 |
+
if not isinstance(c, dict):
|
| 88 |
+
continue
|
| 89 |
+
title = c.get("title") or c.get("qnaTitle") or c.get("question", "")
|
| 90 |
+
excerpt = c.get("content") or c.get("qnaContent") or c.get("qnaAnswer", "")
|
| 91 |
+
link = c.get("url") or c.get("detailUrl", "")
|
| 92 |
+
evidence_items.append(
|
| 93 |
+
EvidenceItem(
|
| 94 |
+
source_type="api",
|
| 95 |
+
title=str(title),
|
| 96 |
+
excerpt=str(excerpt)[:500],
|
| 97 |
+
link_or_path=str(link),
|
| 98 |
+
score=float(c.get("score", 0)),
|
| 99 |
+
provider_meta={"provider": "data.go.kr"},
|
| 100 |
+
)
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
if isinstance(raw, dict):
|
| 104 |
+
raw_errors = raw.get("errors", [])
|
| 105 |
+
if isinstance(raw_errors, list):
|
| 106 |
+
errors = [str(e) for e in raw_errors]
|
| 107 |
+
|
| 108 |
+
status: str
|
| 109 |
+
if not evidence_items and errors:
|
| 110 |
+
status = "error"
|
| 111 |
+
elif not evidence_items:
|
| 112 |
+
status = "empty"
|
| 113 |
+
elif errors:
|
| 114 |
+
status = "partial"
|
| 115 |
+
else:
|
| 116 |
+
status = "ok"
|
| 117 |
+
|
| 118 |
+
envelope = EvidenceEnvelope(
|
| 119 |
+
items=evidence_items,
|
| 120 |
+
summary_text=text,
|
| 121 |
+
status=status,
|
| 122 |
+
errors=errors,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
return LookupResult(
|
| 126 |
+
success=True,
|
| 127 |
+
query=query,
|
| 128 |
+
context_text=text,
|
| 129 |
+
citations=citations,
|
| 130 |
+
results=rag_results,
|
| 131 |
+
provider=self.metadata.provider,
|
| 132 |
+
evidence=envelope,
|
| 133 |
+
)
|
src/inference/graph/capabilities/base.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LangGraph capability ๊ณตํต ์ถ์ํ."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from abc import ABC, abstractmethod
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from typing import Any, Dict, List, Optional
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class EvidenceItem:
|
| 12 |
+
"""RAG/API ์ถ์ฒ ๋ฌด๊ดํ๊ฒ ๋์ผํ ๊ตฌ์กฐ๋ก evidence๋ฅผ ํํ.
|
| 13 |
+
|
| 14 |
+
source_type: "rag" | "api" | "llm_generated"
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
source_type: str # "rag" | "api" | "llm_generated"
|
| 18 |
+
title: str
|
| 19 |
+
excerpt: str # ๋ณธ๋ฌธ ๋ฐ์ท (์ต๋ 500์)
|
| 20 |
+
link_or_path: str = "" # URL(API) ๋๋ file_path(RAG)
|
| 21 |
+
page: Optional[int] = None
|
| 22 |
+
score: float = 0.0
|
| 23 |
+
provider_meta: Dict[str, Any] = field(default_factory=dict)
|
| 24 |
+
|
| 25 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 26 |
+
return {
|
| 27 |
+
"source_type": self.source_type,
|
| 28 |
+
"title": self.title,
|
| 29 |
+
"excerpt": self.excerpt,
|
| 30 |
+
"link_or_path": self.link_or_path,
|
| 31 |
+
"page": self.page,
|
| 32 |
+
"score": self.score,
|
| 33 |
+
"provider_meta": self.provider_meta,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class EvidenceEnvelope:
|
| 39 |
+
"""mixed evidence ๊ฒฐ๊ณผ ์ปจํ
์ด๋."""
|
| 40 |
+
|
| 41 |
+
items: List[EvidenceItem] = field(default_factory=list)
|
| 42 |
+
summary_text: str = ""
|
| 43 |
+
status: str = "ok" # "ok" | "empty" | "partial" | "error"
|
| 44 |
+
errors: List[str] = field(default_factory=list)
|
| 45 |
+
|
| 46 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 47 |
+
return {
|
| 48 |
+
"items": [item.to_dict() for item in self.items],
|
| 49 |
+
"summary_text": self.summary_text,
|
| 50 |
+
"status": self.status,
|
| 51 |
+
"errors": self.errors,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@dataclass
|
| 56 |
+
class CapabilityMetadata:
|
| 57 |
+
"""plannerยทexecutorยทsession trace์์ ๊ณตํต์ผ๋ก ์ฌ์ฉํ๋ capability ๋ฉํ๋ฐ์ดํฐ."""
|
| 58 |
+
|
| 59 |
+
name: str # tool registry key (์: "api_lookup")
|
| 60 |
+
description: str # LLM planner๊ฐ ์ฝ๋ ํ๊ตญ์ด ์ค๋ช
(1-2๋ฌธ์ฅ)
|
| 61 |
+
approval_summary: str # approval_wait ํ๋กฌํํธ์ ํ์๋๋ ์์ฝ
|
| 62 |
+
provider: str # ๋ฐ์ดํฐ ์ ๊ณต์ ์๋ณ์ (์: "data.go.kr")
|
| 63 |
+
timeout_sec: float = 10.0 # ๊ธฐ๋ณธ ํ์์์
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@dataclass
|
| 67 |
+
class LookupResult:
|
| 68 |
+
"""api_lookup ๊ณตํต ์๋ต ์คํค๋ง."""
|
| 69 |
+
|
| 70 |
+
success: bool
|
| 71 |
+
query: str
|
| 72 |
+
results: List[Dict[str, Any]] = field(default_factory=list)
|
| 73 |
+
context_text: str = ""
|
| 74 |
+
citations: List[Dict[str, Any]] = field(default_factory=list)
|
| 75 |
+
provider: str = ""
|
| 76 |
+
error: Optional[str] = None
|
| 77 |
+
empty_reason: Optional[str] = None # "quota", "no_match", "provider_error"
|
| 78 |
+
latency_ms: float = 0.0
|
| 79 |
+
evidence: Optional[EvidenceEnvelope] = None # ์ ๊ทํ๋ evidence (์ ํ๋)
|
| 80 |
+
|
| 81 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 82 |
+
d: Dict[str, Any] = {
|
| 83 |
+
"success": self.success,
|
| 84 |
+
"query": self.query,
|
| 85 |
+
"count": len(self.results),
|
| 86 |
+
"results": self.results,
|
| 87 |
+
"context_text": self.context_text,
|
| 88 |
+
"citations": self.citations,
|
| 89 |
+
"provider": self.provider,
|
| 90 |
+
"error": self.error,
|
| 91 |
+
"empty_reason": self.empty_reason,
|
| 92 |
+
"latency_ms": round(self.latency_ms, 2),
|
| 93 |
+
}
|
| 94 |
+
if self.evidence is not None:
|
| 95 |
+
d["evidence"] = self.evidence.to_dict()
|
| 96 |
+
return d
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class CapabilityBase(ABC):
|
| 100 |
+
"""LangGraph tool capability ์ถ์ ๋ฒ ์ด์ค.
|
| 101 |
+
|
| 102 |
+
RegistryExecutorAdapter์ tool_registry์ ๋ฑ๋ก ๊ฐ๋ฅํ async callable ์ธํฐํ์ด์ค.
|
| 103 |
+
"""
|
| 104 |
+
|
| 105 |
+
@property
|
| 106 |
+
@abstractmethod
|
| 107 |
+
def metadata(self) -> CapabilityMetadata: ...
|
| 108 |
+
|
| 109 |
+
@abstractmethod
|
| 110 |
+
async def execute(
|
| 111 |
+
self,
|
| 112 |
+
query: str,
|
| 113 |
+
context: Dict[str, Any],
|
| 114 |
+
session: Any, # SessionContext (์ํ import ๋ฐฉ์ง)
|
| 115 |
+
) -> LookupResult: ...
|
| 116 |
+
|
| 117 |
+
async def __call__(
|
| 118 |
+
self,
|
| 119 |
+
query: str,
|
| 120 |
+
context: Dict[str, Any],
|
| 121 |
+
session: Any,
|
| 122 |
+
) -> Dict[str, Any]:
|
| 123 |
+
"""RegistryExecutorAdapter ํธํ ์ง์
์ ."""
|
| 124 |
+
import time
|
| 125 |
+
|
| 126 |
+
start = time.monotonic()
|
| 127 |
+
result = await self.execute(query, context, session)
|
| 128 |
+
result.latency_ms = (time.monotonic() - start) * 1000
|
| 129 |
+
return result.to_dict()
|
src/inference/graph/capabilities/defaults.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Capability timeout/retry ๊ธฐ๋ณธ๊ฐ ๋ชจ๋.
|
| 2 |
+
|
| 3 |
+
Issue #163: capability๋ณ timeout๊ณผ retry ๊ธฐ๋ณธ๊ฐ์ ์ค์ ์ง์ค ๊ด๋ฆฌ.
|
| 4 |
+
ํ๊ฒฝ๋ณ์ GOVON_TOOL_TIMEOUT_{CAPABILITY_NAME} ์ผ๋ก ์ค๋ฒ๋ผ์ด๋ ๊ฐ๋ฅ.
|
| 5 |
+
|
| 6 |
+
์: GOVON_TOOL_TIMEOUT_RAG_SEARCH=20 -> rag_search timeout์ 20์ด๋ก ๋ณ๊ฒฝ
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
from typing import Dict
|
| 14 |
+
|
| 15 |
+
from loguru import logger
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass(frozen=True)
|
| 19 |
+
class CapabilityDefaults:
|
| 20 |
+
"""capability๋ณ timeout/retry ๊ธฐ๋ณธ๊ฐ."""
|
| 21 |
+
|
| 22 |
+
timeout_sec: float
|
| 23 |
+
max_retries: int
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# -----------------------------------------------------------------------
|
| 27 |
+
# ๊ธฐ๋ณธ๊ฐ ์ ์ (์ฝ๋๋ฒ ์ด์ค capability metadata์์ ์ถ์ถ)
|
| 28 |
+
# -----------------------------------------------------------------------
|
| 29 |
+
|
| 30 |
+
_DEFAULTS: Dict[str, CapabilityDefaults] = {
|
| 31 |
+
"rag_search": CapabilityDefaults(timeout_sec=15.0, max_retries=0),
|
| 32 |
+
"api_lookup": CapabilityDefaults(timeout_sec=10.0, max_retries=1),
|
| 33 |
+
"draft_civil_response": CapabilityDefaults(timeout_sec=30.0, max_retries=0),
|
| 34 |
+
"append_evidence": CapabilityDefaults(timeout_sec=30.0, max_retries=0),
|
| 35 |
+
"issue_detector": CapabilityDefaults(timeout_sec=15.0, max_retries=0),
|
| 36 |
+
"stats_lookup": CapabilityDefaults(timeout_sec=15.0, max_retries=0),
|
| 37 |
+
"keyword_analyzer": CapabilityDefaults(timeout_sec=10.0, max_retries=0),
|
| 38 |
+
"demographics_lookup": CapabilityDefaults(timeout_sec=15.0, max_retries=0),
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_timeout(capability_name: str) -> float:
|
| 43 |
+
"""capability์ timeout(์ด)์ ๋ฐํํ๋ค.
|
| 44 |
+
|
| 45 |
+
ํ๊ฒฝ๋ณ์ ``GOVON_TOOL_TIMEOUT_{CAPABILITY_NAME_UPPER}`` ๊ฐ ์ค์ ๋์ด ์์ผ๋ฉด
|
| 46 |
+
ํด๋น ๊ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด ๊ธฐ๋ณธ๊ฐ์ ๋ฐํํ๋ค.
|
| 47 |
+
|
| 48 |
+
Parameters
|
| 49 |
+
----------
|
| 50 |
+
capability_name : str
|
| 51 |
+
capability ์ด๋ฆ (์: "rag_search").
|
| 52 |
+
|
| 53 |
+
Returns
|
| 54 |
+
-------
|
| 55 |
+
float
|
| 56 |
+
timeout ์ด. ์ ์ ์๋ capability๋ 10.0์ด.
|
| 57 |
+
"""
|
| 58 |
+
env_key = f"GOVON_TOOL_TIMEOUT_{capability_name.upper()}"
|
| 59 |
+
env_val = os.environ.get(env_key)
|
| 60 |
+
if env_val is not None:
|
| 61 |
+
try:
|
| 62 |
+
val = float(env_val)
|
| 63 |
+
if val > 0:
|
| 64 |
+
return val
|
| 65 |
+
logger.warning(
|
| 66 |
+
f"GOVON_TOOL_TIMEOUT_{capability_name.upper()} ๊ฐ์ด ์์๊ฐ ์๋๋๋ค: {env_val}"
|
| 67 |
+
)
|
| 68 |
+
except ValueError:
|
| 69 |
+
logger.warning(f"{env_key} ๊ฐ์ ์ซ์๋ก ํ์ฑํ ์ ์์ต๋๋ค: {env_val!r}")
|
| 70 |
+
|
| 71 |
+
defaults = _DEFAULTS.get(capability_name)
|
| 72 |
+
return defaults.timeout_sec if defaults else 10.0
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def get_max_retries(capability_name: str) -> int:
|
| 76 |
+
"""capability์ ์ต๋ ์ฌ์๋ ํ์๋ฅผ ๋ฐํํ๋ค.
|
| 77 |
+
|
| 78 |
+
Parameters
|
| 79 |
+
----------
|
| 80 |
+
capability_name : str
|
| 81 |
+
capability ์ด๋ฆ.
|
| 82 |
+
|
| 83 |
+
Returns
|
| 84 |
+
-------
|
| 85 |
+
int
|
| 86 |
+
์ต๋ ์ฌ์๋ ํ์. ์ ์ ์๋ capability๋ 0.
|
| 87 |
+
"""
|
| 88 |
+
defaults = _DEFAULTS.get(capability_name)
|
| 89 |
+
return defaults.max_retries if defaults else 0
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def get_all_defaults() -> Dict[str, CapabilityDefaults]:
|
| 93 |
+
"""๋ฑ๋ก๋ ๋ชจ๋ capability ๊ธฐ๋ณธ๊ฐ์ ๋ฐํํ๋ค."""
|
| 94 |
+
return dict(_DEFAULTS)
|
src/inference/graph/capabilities/demographics_lookup.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""demographics_lookup capability โ ์ฑ๋ณ+์ฐ๋ น+์ธ๊ตฌ๋๋น ์กฐํฉ.
|
| 2 |
+
|
| 3 |
+
Issue #489: ๋ฏผ์ ์ธ๊ตฌํต๊ณ ๋ถ์ ๋๊ตฌ.
|
| 4 |
+
3๊ฐ API(์ฑ๋ณํต๊ณ, ์ฐ๋ นํต๊ณ, ์ธ๊ตฌ๋๋น๋น์จ)๋ฅผ ์กฐํฉํ์ฌ
|
| 5 |
+
๋ฏผ์ ์ธ๊ตฌํต๊ณ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import asyncio
|
| 11 |
+
from typing import Any, Dict, List, Optional
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from .base import (
|
| 16 |
+
CapabilityBase,
|
| 17 |
+
CapabilityMetadata,
|
| 18 |
+
EvidenceEnvelope,
|
| 19 |
+
EvidenceItem,
|
| 20 |
+
LookupResult,
|
| 21 |
+
)
|
| 22 |
+
from .defaults import get_timeout
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class DemographicsLookupCapability(CapabilityBase):
|
| 26 |
+
"""๋ฏผ์ ์ธ๊ตฌํต๊ณ ๋ถ์ capability.
|
| 27 |
+
|
| 28 |
+
์ฑ๋ณ, ์ฐ๋ น, ์ธ๊ตฌ๋๋น ๋น์จ์ ์กฐํฉํ์ฌ ์ธ๊ตฌํต๊ณ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
action : Optional[MinwonAnalysisAction]
|
| 33 |
+
API ํธ์ถ์ฉ Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, action: Optional[Any] = None) -> None:
|
| 37 |
+
self._action = action
|
| 38 |
+
|
| 39 |
+
@property
|
| 40 |
+
def metadata(self) -> CapabilityMetadata:
|
| 41 |
+
return CapabilityMetadata(
|
| 42 |
+
name="demographics_lookup",
|
| 43 |
+
description=(
|
| 44 |
+
"์ฑ๋ณ, ์ฐ๋ น, ์ธ๊ตฌ๋๋น ๋น์จ์ ์กฐํฉํ์ฌ " "๋ฏผ์ ์ธ๊ตฌํต๊ณ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํฉ๋๋ค."
|
| 45 |
+
),
|
| 46 |
+
approval_summary="๊ณต๊ณต๋ฐ์ดํฐํฌํธ์์ ๋ฏผ์ ์ธ๊ตฌํต๊ณ๋ฅผ ๋ถ์ํฉ๋๋ค.",
|
| 47 |
+
provider="data.go.kr",
|
| 48 |
+
timeout_sec=get_timeout("demographics_lookup"),
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
async def execute(
|
| 52 |
+
self,
|
| 53 |
+
query: str,
|
| 54 |
+
context: Dict[str, Any],
|
| 55 |
+
session: Any,
|
| 56 |
+
) -> LookupResult:
|
| 57 |
+
"""์ฑ๋ณ+์ฐ๋ น+์ธ๊ตฌ๋๋น API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ์กฐํฉํ๋ค."""
|
| 58 |
+
provider = self.metadata.provider
|
| 59 |
+
|
| 60 |
+
if not query or not query.strip():
|
| 61 |
+
return LookupResult(
|
| 62 |
+
success=False,
|
| 63 |
+
query=query,
|
| 64 |
+
provider=provider,
|
| 65 |
+
error="query๊ฐ ๋น์ด์์ต๋๋ค",
|
| 66 |
+
empty_reason="validation_error",
|
| 67 |
+
evidence=EvidenceEnvelope(status="error", errors=["query๊ฐ ๋น์ด์์ต๋๋ค"]),
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
if self._action is None:
|
| 71 |
+
logger.debug("[demographics_lookup] action์ด None - ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 72 |
+
return LookupResult(
|
| 73 |
+
success=True,
|
| 74 |
+
query=query,
|
| 75 |
+
provider=provider,
|
| 76 |
+
empty_reason="no_match",
|
| 77 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
date_from = context.get("date_from", "")
|
| 81 |
+
date_to = context.get("date_to", "")
|
| 82 |
+
searchword = context.get("searchword", "")
|
| 83 |
+
top_n = int(context.get("top_n", 5))
|
| 84 |
+
|
| 85 |
+
if not searchword:
|
| 86 |
+
return LookupResult(
|
| 87 |
+
success=False,
|
| 88 |
+
query=query,
|
| 89 |
+
provider=provider,
|
| 90 |
+
error="์ธ๊ตฌํต๊ณ ๋ถ์์๋ searchword๊ฐ ํ์ํฉ๋๋ค",
|
| 91 |
+
empty_reason="validation_error",
|
| 92 |
+
evidence=EvidenceEnvelope(
|
| 93 |
+
status="error",
|
| 94 |
+
errors=["์ธ๊ตฌํต๊ณ ๋ถ์์๋ searchword๊ฐ ํ์ํฉ๋๋ค"],
|
| 95 |
+
),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
gender, age, population = await asyncio.wait_for(
|
| 100 |
+
self._fetch_all(date_from, date_to, searchword, top_n),
|
| 101 |
+
timeout=self.metadata.timeout_sec,
|
| 102 |
+
)
|
| 103 |
+
except asyncio.TimeoutError:
|
| 104 |
+
msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)"
|
| 105 |
+
logger.warning(f"[demographics_lookup] {msg}")
|
| 106 |
+
return LookupResult(
|
| 107 |
+
success=False,
|
| 108 |
+
query=query,
|
| 109 |
+
provider=provider,
|
| 110 |
+
error=msg,
|
| 111 |
+
empty_reason="provider_error",
|
| 112 |
+
evidence=EvidenceEnvelope(status="error", errors=[msg]),
|
| 113 |
+
)
|
| 114 |
+
except Exception as exc:
|
| 115 |
+
logger.error(f"[demographics_lookup] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 116 |
+
return LookupResult(
|
| 117 |
+
success=False,
|
| 118 |
+
query=query,
|
| 119 |
+
provider=provider,
|
| 120 |
+
error=str(exc),
|
| 121 |
+
empty_reason="provider_error",
|
| 122 |
+
evidence=EvidenceEnvelope(status="error", errors=[str(exc)]),
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
all_results: List[Dict[str, Any]] = []
|
| 126 |
+
evidence_items: List[EvidenceItem] = []
|
| 127 |
+
errors: List[str] = []
|
| 128 |
+
|
| 129 |
+
if gender is not None:
|
| 130 |
+
for item in gender:
|
| 131 |
+
item["_source_api"] = "gender"
|
| 132 |
+
all_results.append(item)
|
| 133 |
+
evidence_items.append(
|
| 134 |
+
EvidenceItem(
|
| 135 |
+
source_type="api",
|
| 136 |
+
title=item.get("label", ""),
|
| 137 |
+
excerpt=f"์ฑ๋ณ: {item.get('label', '')}, " f"๊ฑด์={item.get('hits', 0)}",
|
| 138 |
+
provider_meta={"provider": provider, "api": "gender"},
|
| 139 |
+
)
|
| 140 |
+
)
|
| 141 |
+
else:
|
| 142 |
+
errors.append("์ฑ๋ณํต๊ณ API ์คํจ")
|
| 143 |
+
|
| 144 |
+
if age is not None:
|
| 145 |
+
for item in age:
|
| 146 |
+
item["_source_api"] = "age"
|
| 147 |
+
all_results.append(item)
|
| 148 |
+
evidence_items.append(
|
| 149 |
+
EvidenceItem(
|
| 150 |
+
source_type="api",
|
| 151 |
+
title=f"{item.get('label', '')}๋",
|
| 152 |
+
excerpt=f"์ฐ๋ น: {item.get('label', '')}๋, " f"๊ฑด์={item.get('hits', 0)}",
|
| 153 |
+
provider_meta={"provider": provider, "api": "age"},
|
| 154 |
+
)
|
| 155 |
+
)
|
| 156 |
+
else:
|
| 157 |
+
errors.append("์ฐ๋ นํต๊ณ API ์คํจ")
|
| 158 |
+
|
| 159 |
+
if population is not None:
|
| 160 |
+
for item in population:
|
| 161 |
+
item["_source_api"] = "population"
|
| 162 |
+
all_results.append(item)
|
| 163 |
+
evidence_items.append(
|
| 164 |
+
EvidenceItem(
|
| 165 |
+
source_type="api",
|
| 166 |
+
title=item.get("label", ""),
|
| 167 |
+
excerpt=f"์ธ๊ตฌ๋๋น: {item.get('label', '')}, "
|
| 168 |
+
f"๋น์จ={item.get('ratio', '')}",
|
| 169 |
+
provider_meta={"provider": provider, "api": "population"},
|
| 170 |
+
)
|
| 171 |
+
)
|
| 172 |
+
else:
|
| 173 |
+
errors.append("์ธ๊ตฌ๋๋น API ์คํจ")
|
| 174 |
+
|
| 175 |
+
if not all_results:
|
| 176 |
+
status = "error" if len(errors) == 3 else "empty"
|
| 177 |
+
return LookupResult(
|
| 178 |
+
success=len(errors) < 3,
|
| 179 |
+
query=query,
|
| 180 |
+
provider=provider,
|
| 181 |
+
empty_reason="no_match" if len(errors) < 3 else "provider_error",
|
| 182 |
+
error="; ".join(errors) if errors else None,
|
| 183 |
+
evidence=EvidenceEnvelope(items=[], status=status, errors=errors),
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
context_text = self._build_context_text(gender, age, population)
|
| 187 |
+
status = "ok" if not errors else "partial"
|
| 188 |
+
|
| 189 |
+
return LookupResult(
|
| 190 |
+
success=True,
|
| 191 |
+
query=query,
|
| 192 |
+
results=all_results,
|
| 193 |
+
context_text=context_text,
|
| 194 |
+
provider=provider,
|
| 195 |
+
evidence=EvidenceEnvelope(
|
| 196 |
+
items=evidence_items,
|
| 197 |
+
summary_text=context_text,
|
| 198 |
+
status=status,
|
| 199 |
+
errors=errors,
|
| 200 |
+
),
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
async def _fetch_all(
|
| 204 |
+
self,
|
| 205 |
+
date_from: str,
|
| 206 |
+
date_to: str,
|
| 207 |
+
searchword: str,
|
| 208 |
+
top_n: int,
|
| 209 |
+
) -> tuple:
|
| 210 |
+
"""3๊ฐ API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๋ค."""
|
| 211 |
+
tasks = [
|
| 212 |
+
self._safe_call(
|
| 213 |
+
self._action.get_gender_stats,
|
| 214 |
+
date_from=date_from,
|
| 215 |
+
date_to=date_to,
|
| 216 |
+
searchword=searchword,
|
| 217 |
+
),
|
| 218 |
+
self._safe_call(
|
| 219 |
+
self._action.get_age_stats,
|
| 220 |
+
date_from=date_from,
|
| 221 |
+
date_to=date_to,
|
| 222 |
+
searchword=searchword,
|
| 223 |
+
),
|
| 224 |
+
self._safe_call(
|
| 225 |
+
self._action.get_population_ratio,
|
| 226 |
+
date_from=date_from,
|
| 227 |
+
date_to=date_to,
|
| 228 |
+
top_n=top_n,
|
| 229 |
+
),
|
| 230 |
+
]
|
| 231 |
+
return tuple(await asyncio.gather(*tasks))
|
| 232 |
+
|
| 233 |
+
@staticmethod
|
| 234 |
+
async def _safe_call(fn, **kwargs) -> Optional[List[Dict[str, Any]]]:
|
| 235 |
+
"""๊ฐ๋ณ API ํธ์ถ์ ์์ ํ๊ฒ ๋ํํ๋ค."""
|
| 236 |
+
try:
|
| 237 |
+
return await fn(**kwargs)
|
| 238 |
+
except Exception as exc:
|
| 239 |
+
logger.warning(f"[demographics_lookup] ๊ฐ๋ณ API ์คํจ: {exc}")
|
| 240 |
+
return None
|
| 241 |
+
|
| 242 |
+
@staticmethod
|
| 243 |
+
def _build_context_text(
|
| 244 |
+
gender: Optional[List],
|
| 245 |
+
age: Optional[List],
|
| 246 |
+
population: Optional[List],
|
| 247 |
+
) -> str:
|
| 248 |
+
"""์กฐํฉ ๊ฒฐ๊ณผ์์ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค."""
|
| 249 |
+
parts: List[str] = []
|
| 250 |
+
|
| 251 |
+
if gender:
|
| 252 |
+
total = sum(int(g.get("hits", 0)) for g in gender)
|
| 253 |
+
if total > 0:
|
| 254 |
+
items = []
|
| 255 |
+
for g in gender:
|
| 256 |
+
label = g.get("label", "")
|
| 257 |
+
hits = int(g.get("hits", 0))
|
| 258 |
+
pct = (hits / total * 100) if total else 0
|
| 259 |
+
items.append(f"{label} {pct:.1f}%")
|
| 260 |
+
parts.append(", ".join(items))
|
| 261 |
+
|
| 262 |
+
if age:
|
| 263 |
+
# ๊ฐ์ฅ ๋์ ๊ฑด์์ ์ฐ๋ น๋
|
| 264 |
+
sorted_age = sorted(age, key=lambda x: int(x.get("hits", 0)), reverse=True)
|
| 265 |
+
if sorted_age:
|
| 266 |
+
top = sorted_age[0]
|
| 267 |
+
total = sum(int(a.get("hits", 0)) for a in age)
|
| 268 |
+
hits = int(top.get("hits", 0))
|
| 269 |
+
pct = (hits / total * 100) if total else 0
|
| 270 |
+
parts.append(f"{top.get('label', '')}๋ ์ต๋ค({pct:.1f}%)")
|
| 271 |
+
|
| 272 |
+
if population:
|
| 273 |
+
if len(population) > 0:
|
| 274 |
+
top = population[0]
|
| 275 |
+
ratio = top.get("ratio", "")
|
| 276 |
+
label = top.get("label", "")
|
| 277 |
+
try:
|
| 278 |
+
ratio_pct = float(ratio) * 100 if ratio else 0
|
| 279 |
+
parts.append(f"{label} ์ธ๊ตฌ๋๋น {ratio_pct:.2f}%")
|
| 280 |
+
except (ValueError, TypeError):
|
| 281 |
+
parts.append(f"{label} ์ธ๊ตฌ๋๋น {ratio}")
|
| 282 |
+
|
| 283 |
+
return ", ".join(parts) if parts else ""
|
src/inference/graph/capabilities/draft_civil_response.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""draft_civil_response capability โ ๊ธฐ์กด closure๋ฅผ CapabilityBase๋ก ๋ํ."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any, Callable, Dict
|
| 6 |
+
|
| 7 |
+
from .base import CapabilityBase, CapabilityMetadata, EvidenceEnvelope, EvidenceItem, LookupResult
|
| 8 |
+
from .defaults import get_timeout
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DraftCivilResponseCapability(CapabilityBase):
|
| 12 |
+
"""๋ฏผ์ ๋ต๋ณ ์ด์ ์์ฑ capability.
|
| 13 |
+
|
| 14 |
+
๊ธฐ์กด api_server์ _draft_civil_response_tool closure๋ฅผ ์ฃผ์
๋ฐ์
|
| 15 |
+
CapabilityBase ์ธํฐํ์ด์ค๋ก ๋ํํ๋ค.
|
| 16 |
+
|
| 17 |
+
Parameters
|
| 18 |
+
----------
|
| 19 |
+
execute_fn : Callable
|
| 20 |
+
``async (query, context, session) -> dict`` ์๊ทธ๋์ฒ์ ์คํ ํจ์.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, execute_fn: Callable[..., Any]) -> None:
|
| 24 |
+
self._execute_fn = execute_fn
|
| 25 |
+
|
| 26 |
+
@property
|
| 27 |
+
def metadata(self) -> CapabilityMetadata:
|
| 28 |
+
return CapabilityMetadata(
|
| 29 |
+
name="draft_civil_response",
|
| 30 |
+
description=(
|
| 31 |
+
"๊ฒ์๋ ๋ฒ๋ น/์ฌ๋ก์ ์ธ๋ถ ๋ฏผ์๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ข
ํฉํ์ฌ " "๋ฏผ์ ๋ต๋ณ ์ด์์ ์์ฑํฉ๋๋ค."
|
| 32 |
+
),
|
| 33 |
+
approval_summary="AI ๋ชจ๋ธ์ด ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ข
ํฉํ์ฌ ๋ฏผ์ ๋ต๋ณ ์ด์์ ์์ฑํฉ๋๋ค.",
|
| 34 |
+
provider="local_llm",
|
| 35 |
+
timeout_sec=get_timeout("draft_civil_response"),
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
async def execute(
|
| 39 |
+
self,
|
| 40 |
+
query: str,
|
| 41 |
+
context: Dict[str, Any],
|
| 42 |
+
session: Any,
|
| 43 |
+
) -> LookupResult:
|
| 44 |
+
"""์ฃผ์
๋ฐ์ ํจ์์ ์์ํ๊ณ ๊ฒฐ๊ณผ๋ฅผ LookupResult๋ก ๋ณํํ๋ค."""
|
| 45 |
+
raw = await self._execute_fn(query=query, context=context, session=session)
|
| 46 |
+
|
| 47 |
+
if isinstance(raw, dict) and raw.get("error"):
|
| 48 |
+
return LookupResult(
|
| 49 |
+
success=False,
|
| 50 |
+
query=query,
|
| 51 |
+
provider=self.metadata.provider,
|
| 52 |
+
error=raw["error"],
|
| 53 |
+
empty_reason="provider_error",
|
| 54 |
+
evidence=EvidenceEnvelope(
|
| 55 |
+
status="error",
|
| 56 |
+
errors=[raw["error"]],
|
| 57 |
+
),
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
text = raw.get("text", "") if isinstance(raw, dict) else str(raw)
|
| 61 |
+
|
| 62 |
+
# draft์์ ์ฐธ์กฐ๋ ์ฌ๋ก๋ฅผ EvidenceItem์ผ๋ก ๋ณํ
|
| 63 |
+
evidence_items: list[EvidenceItem] = []
|
| 64 |
+
if isinstance(raw, dict):
|
| 65 |
+
# raw์ ํฌํจ๋ citations/references๋ฅผ EvidenceItem์ผ๋ก ๋ณํ
|
| 66 |
+
for ref in raw.get("citations", []):
|
| 67 |
+
if not isinstance(ref, dict):
|
| 68 |
+
continue
|
| 69 |
+
title = ref.get("title") or ref.get("qnaTitle") or ref.get("question", "")
|
| 70 |
+
excerpt = ref.get("content") or ref.get("qnaContent") or ref.get("qnaAnswer", "")
|
| 71 |
+
link = ref.get("url") or ref.get("detailUrl", "")
|
| 72 |
+
evidence_items.append(
|
| 73 |
+
EvidenceItem(
|
| 74 |
+
source_type="llm_generated",
|
| 75 |
+
title=str(title),
|
| 76 |
+
excerpt=str(excerpt)[:500],
|
| 77 |
+
link_or_path=str(link),
|
| 78 |
+
provider_meta={"provider": self.metadata.provider},
|
| 79 |
+
)
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
envelope = EvidenceEnvelope(
|
| 83 |
+
items=evidence_items,
|
| 84 |
+
summary_text=text,
|
| 85 |
+
status="ok",
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return LookupResult(
|
| 89 |
+
success=True,
|
| 90 |
+
query=query,
|
| 91 |
+
context_text=text,
|
| 92 |
+
provider=self.metadata.provider,
|
| 93 |
+
# draft ๊ฒฐ๊ณผ๋ results ๋์ context_text์ ๋ด๊ธด๋ค
|
| 94 |
+
results=[raw] if isinstance(raw, dict) else [],
|
| 95 |
+
evidence=envelope,
|
| 96 |
+
)
|
src/inference/graph/capabilities/issue_detector.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""issue_detector capability โ ๊ธ์ฆํค์๋+์ค๋์ด์+์ต๋คํค์๋ ์กฐํฉ.
|
| 2 |
+
|
| 3 |
+
Issue #486: ๋ฏผ์ ์ด์ ํ์ง ๋๊ตฌ.
|
| 4 |
+
3๊ฐ API(๊ธ์ฆํค์๋, ์ค๋์ด์, ์ต๋คํค์๋)๋ฅผ ์กฐํฉํ์ฌ
|
| 5 |
+
ํ์ฌ ์ฃผ์ ์ด์๋ฅผ ํ์งํ๊ณ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import asyncio
|
| 11 |
+
from typing import Any, Dict, List, Optional
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from .base import (
|
| 16 |
+
CapabilityBase,
|
| 17 |
+
CapabilityMetadata,
|
| 18 |
+
EvidenceEnvelope,
|
| 19 |
+
EvidenceItem,
|
| 20 |
+
LookupResult,
|
| 21 |
+
)
|
| 22 |
+
from .defaults import get_timeout
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class IssueDetectorCapability(CapabilityBase):
|
| 26 |
+
"""๋ฏผ์ ์ด์ ํ์ง capability.
|
| 27 |
+
|
| 28 |
+
๊ธ์ฆํค์๋, ์ค๋ ์ด์ ํ ํฝ, ์ต๋ค ํค์๋๋ฅผ ์กฐํฉํ์ฌ
|
| 29 |
+
ํ์ฌ ์ฃผ์ ๋ฏผ์ ์ด์๋ฅผ ํ์
ํ๋ค.
|
| 30 |
+
|
| 31 |
+
Parameters
|
| 32 |
+
----------
|
| 33 |
+
action : Optional[MinwonAnalysisAction]
|
| 34 |
+
API ํธ์ถ์ฉ Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self, action: Optional[Any] = None) -> None:
|
| 38 |
+
self._action = action
|
| 39 |
+
|
| 40 |
+
@property
|
| 41 |
+
def metadata(self) -> CapabilityMetadata:
|
| 42 |
+
return CapabilityMetadata(
|
| 43 |
+
name="issue_detector",
|
| 44 |
+
description=(
|
| 45 |
+
"๊ธ์ฆํค์๋, ์ค๋์ด์, ์ต๋คํค์๋๋ฅผ ์กฐํฉํ์ฌ " "ํ์ฌ ์ฃผ์ ๋ฏผ์ ์ด์๋ฅผ ํ์งํฉ๋๋ค."
|
| 46 |
+
),
|
| 47 |
+
approval_summary="๊ณต๊ณต๋ฐ์ดํฐํฌํธ์์ ๋ฏผ์ ์ด์ ํํฉ์ ์กฐํํฉ๋๋ค.",
|
| 48 |
+
provider="data.go.kr",
|
| 49 |
+
timeout_sec=get_timeout("issue_detector"),
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
async def execute(
|
| 53 |
+
self,
|
| 54 |
+
query: str,
|
| 55 |
+
context: Dict[str, Any],
|
| 56 |
+
session: Any,
|
| 57 |
+
) -> LookupResult:
|
| 58 |
+
"""3๊ฐ API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ์กฐํฉํ๋ค."""
|
| 59 |
+
provider = self.metadata.provider
|
| 60 |
+
|
| 61 |
+
if not query or not query.strip():
|
| 62 |
+
return LookupResult(
|
| 63 |
+
success=False,
|
| 64 |
+
query=query,
|
| 65 |
+
provider=provider,
|
| 66 |
+
error="query๊ฐ ๋น์ด์์ต๋๋ค",
|
| 67 |
+
empty_reason="validation_error",
|
| 68 |
+
evidence=EvidenceEnvelope(status="error", errors=["query๊ฐ ๋น์ด์์ต๋๋ค"]),
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if self._action is None:
|
| 72 |
+
logger.debug("[issue_detector] action์ด None - ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 73 |
+
return LookupResult(
|
| 74 |
+
success=True,
|
| 75 |
+
query=query,
|
| 76 |
+
provider=provider,
|
| 77 |
+
empty_reason="no_match",
|
| 78 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# ๋ ์ง ํ๋ผ๋ฏธํฐ ์ถ์ถ ๋ฐ ๊ฒ์ฆ
|
| 82 |
+
analysis_time = context.get("analysis_time", "")
|
| 83 |
+
search_date = context.get("search_date", "")
|
| 84 |
+
max_result = int(context.get("max_result", 5))
|
| 85 |
+
|
| 86 |
+
if not analysis_time and not search_date:
|
| 87 |
+
err = "analysis_time ๋๋ search_date ํ๋ผ๋ฏธํฐ๊ฐ ํ์ํฉ๋๋ค"
|
| 88 |
+
return LookupResult(
|
| 89 |
+
success=False,
|
| 90 |
+
query=query,
|
| 91 |
+
provider=provider,
|
| 92 |
+
error=err,
|
| 93 |
+
empty_reason="validation_error",
|
| 94 |
+
evidence=EvidenceEnvelope(status="error", errors=[err]),
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
rising, topics, top_kw = await asyncio.wait_for(
|
| 99 |
+
self._fetch_all(analysis_time, search_date, max_result),
|
| 100 |
+
timeout=self.metadata.timeout_sec,
|
| 101 |
+
)
|
| 102 |
+
except asyncio.TimeoutError:
|
| 103 |
+
msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)"
|
| 104 |
+
logger.warning(f"[issue_detector] {msg}")
|
| 105 |
+
return LookupResult(
|
| 106 |
+
success=False,
|
| 107 |
+
query=query,
|
| 108 |
+
provider=provider,
|
| 109 |
+
error=msg,
|
| 110 |
+
empty_reason="provider_error",
|
| 111 |
+
evidence=EvidenceEnvelope(status="error", errors=[msg]),
|
| 112 |
+
)
|
| 113 |
+
except Exception as exc:
|
| 114 |
+
logger.error(f"[issue_detector] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 115 |
+
return LookupResult(
|
| 116 |
+
success=False,
|
| 117 |
+
query=query,
|
| 118 |
+
provider=provider,
|
| 119 |
+
error=str(exc),
|
| 120 |
+
empty_reason="provider_error",
|
| 121 |
+
evidence=EvidenceEnvelope(status="error", errors=[str(exc)]),
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# ๊ฒฐ๊ณผ ์กฐํฉ
|
| 125 |
+
all_results: List[Dict[str, Any]] = []
|
| 126 |
+
evidence_items: List[EvidenceItem] = []
|
| 127 |
+
errors: List[str] = []
|
| 128 |
+
|
| 129 |
+
if rising is not None:
|
| 130 |
+
for item in rising:
|
| 131 |
+
item["_source_api"] = "rising_keyword"
|
| 132 |
+
all_results.append(item)
|
| 133 |
+
evidence_items.append(
|
| 134 |
+
EvidenceItem(
|
| 135 |
+
source_type="api",
|
| 136 |
+
title=item.get("keyword", ""),
|
| 137 |
+
excerpt=f"๊ธ์ฆํค์๋: {item.get('keyword', '')}, "
|
| 138 |
+
f"๋น๋={item.get('df', 0)}, ์ ์ผ๋๋น={item.get('prevRatio', '')}%",
|
| 139 |
+
provider_meta={"provider": provider, "api": "rising_keyword"},
|
| 140 |
+
)
|
| 141 |
+
)
|
| 142 |
+
else:
|
| 143 |
+
errors.append("๊ธ์ฆํค์๋ API ์คํจ")
|
| 144 |
+
|
| 145 |
+
if topics is not None:
|
| 146 |
+
for item in topics:
|
| 147 |
+
item["_source_api"] = "today_topic"
|
| 148 |
+
all_results.append(item)
|
| 149 |
+
evidence_items.append(
|
| 150 |
+
EvidenceItem(
|
| 151 |
+
source_type="api",
|
| 152 |
+
title=item.get("topic", ""),
|
| 153 |
+
excerpt=f"์ค๋์ด์: {item.get('topic', '')}, "
|
| 154 |
+
f"๊ฑด์={item.get('count', 0)}",
|
| 155 |
+
provider_meta={"provider": provider, "api": "today_topic"},
|
| 156 |
+
)
|
| 157 |
+
)
|
| 158 |
+
else:
|
| 159 |
+
errors.append("์ค๋์ด์ API ์คํจ")
|
| 160 |
+
|
| 161 |
+
if top_kw is not None:
|
| 162 |
+
for item in top_kw:
|
| 163 |
+
item["_source_api"] = "top_keyword"
|
| 164 |
+
all_results.append(item)
|
| 165 |
+
evidence_items.append(
|
| 166 |
+
EvidenceItem(
|
| 167 |
+
source_type="api",
|
| 168 |
+
title=item.get("term", ""),
|
| 169 |
+
excerpt=f"์ต๋คํค์๋: {item.get('term', '')}, " f"๋น๋={item.get('df', 0)}",
|
| 170 |
+
provider_meta={"provider": provider, "api": "top_keyword"},
|
| 171 |
+
)
|
| 172 |
+
)
|
| 173 |
+
else:
|
| 174 |
+
errors.append("์ต๋คํค์๋ API ์คํจ")
|
| 175 |
+
|
| 176 |
+
if not all_results:
|
| 177 |
+
status = "error" if len(errors) == 3 else "empty"
|
| 178 |
+
return LookupResult(
|
| 179 |
+
success=len(errors) < 3,
|
| 180 |
+
query=query,
|
| 181 |
+
provider=provider,
|
| 182 |
+
empty_reason="no_match" if len(errors) < 3 else "provider_error",
|
| 183 |
+
error="; ".join(errors) if errors else None,
|
| 184 |
+
evidence=EvidenceEnvelope(items=[], status=status, errors=errors),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
context_text = self._build_context_text(rising, topics, top_kw)
|
| 188 |
+
status = "ok" if not errors else "partial"
|
| 189 |
+
|
| 190 |
+
return LookupResult(
|
| 191 |
+
success=True,
|
| 192 |
+
query=query,
|
| 193 |
+
results=all_results,
|
| 194 |
+
context_text=context_text,
|
| 195 |
+
provider=provider,
|
| 196 |
+
evidence=EvidenceEnvelope(
|
| 197 |
+
items=evidence_items,
|
| 198 |
+
summary_text=context_text,
|
| 199 |
+
status=status,
|
| 200 |
+
errors=errors,
|
| 201 |
+
),
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
async def _fetch_all(
|
| 205 |
+
self,
|
| 206 |
+
analysis_time: str,
|
| 207 |
+
search_date: str,
|
| 208 |
+
max_result: int,
|
| 209 |
+
) -> tuple:
|
| 210 |
+
"""3๊ฐ API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๋ค."""
|
| 211 |
+
tasks = [
|
| 212 |
+
(
|
| 213 |
+
self._safe_call(
|
| 214 |
+
self._action.get_rising_keywords,
|
| 215 |
+
analysis_time=analysis_time,
|
| 216 |
+
max_result=max_result,
|
| 217 |
+
)
|
| 218 |
+
if analysis_time
|
| 219 |
+
else self._noop()
|
| 220 |
+
),
|
| 221 |
+
(
|
| 222 |
+
self._safe_call(
|
| 223 |
+
self._action.get_today_topics,
|
| 224 |
+
search_date=search_date,
|
| 225 |
+
top_n=max_result,
|
| 226 |
+
)
|
| 227 |
+
if search_date
|
| 228 |
+
else self._noop()
|
| 229 |
+
),
|
| 230 |
+
(
|
| 231 |
+
self._safe_call(
|
| 232 |
+
self._action.get_top_keywords_by_period,
|
| 233 |
+
analysis_time=analysis_time or search_date,
|
| 234 |
+
max_result=max_result,
|
| 235 |
+
)
|
| 236 |
+
if (analysis_time or search_date)
|
| 237 |
+
else self._noop()
|
| 238 |
+
),
|
| 239 |
+
]
|
| 240 |
+
return await asyncio.gather(*tasks)
|
| 241 |
+
|
| 242 |
+
@staticmethod
|
| 243 |
+
async def _noop() -> None:
|
| 244 |
+
"""๋น ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ no-op ์ฝ๋ฃจํด."""
|
| 245 |
+
return None
|
| 246 |
+
|
| 247 |
+
@staticmethod
|
| 248 |
+
async def _safe_call(fn, **kwargs) -> Optional[List[Dict[str, Any]]]:
|
| 249 |
+
"""๊ฐ๋ณ API ํธ์ถ์ ์์ ํ๊ฒ ๋ํํ๋ค."""
|
| 250 |
+
try:
|
| 251 |
+
return await fn(**kwargs)
|
| 252 |
+
except Exception as exc:
|
| 253 |
+
logger.warning(f"[issue_detector] ๊ฐ๋ณ API ์คํจ: {exc}")
|
| 254 |
+
return None
|
| 255 |
+
|
| 256 |
+
@staticmethod
|
| 257 |
+
def _build_context_text(
|
| 258 |
+
rising: Optional[List],
|
| 259 |
+
topics: Optional[List],
|
| 260 |
+
top_kw: Optional[List],
|
| 261 |
+
) -> str:
|
| 262 |
+
"""์กฐํฉ ๊ฒฐ๊ณผ์์ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค."""
|
| 263 |
+
parts: List[str] = []
|
| 264 |
+
|
| 265 |
+
if rising:
|
| 266 |
+
items = []
|
| 267 |
+
for r in rising[:3]:
|
| 268 |
+
kw = r.get("keyword", "")
|
| 269 |
+
ratio = r.get("prevRatio", "")
|
| 270 |
+
items.append(f"{kw}(+{ratio}%)" if ratio else kw)
|
| 271 |
+
if items:
|
| 272 |
+
parts.append(f"๊ธ์ฆ ์ด์: {', '.join(items)}")
|
| 273 |
+
|
| 274 |
+
if topics:
|
| 275 |
+
items = []
|
| 276 |
+
for t in topics[:3]:
|
| 277 |
+
topic = t.get("topic", "")
|
| 278 |
+
count = t.get("count", 0)
|
| 279 |
+
items.append(f"{topic}({count:,}๊ฑด)")
|
| 280 |
+
if items:
|
| 281 |
+
parts.append(f"์ค๋ ํต์ฌ: {', '.join(items)}")
|
| 282 |
+
|
| 283 |
+
if top_kw:
|
| 284 |
+
items = []
|
| 285 |
+
for k in top_kw[:3]:
|
| 286 |
+
term = k.get("term", "")
|
| 287 |
+
df = k.get("df", 0)
|
| 288 |
+
items.append(f"{term}({df}๊ฑด)")
|
| 289 |
+
if items:
|
| 290 |
+
parts.append(f"์ต๋ค: {', '.join(items)}")
|
| 291 |
+
|
| 292 |
+
return "; ".join(parts) if parts else ""
|
src/inference/graph/capabilities/keyword_analyzer.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""keyword_analyzer capability โ ํต์ฌํค์๋+์ฐ๊ด์ด ์กฐํฉ.
|
| 2 |
+
|
| 3 |
+
Issue #488: ๋ฏผ์ ํค์๋ ๋ถ์ ๋๊ตฌ.
|
| 4 |
+
2๊ฐ API(ํต์ฌํค์๋, ์ฐ๊ด์ด)๋ฅผ ์กฐํฉํ์ฌ
|
| 5 |
+
ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import asyncio
|
| 11 |
+
from typing import Any, Dict, List, Optional
|
| 12 |
+
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from .base import (
|
| 16 |
+
CapabilityBase,
|
| 17 |
+
CapabilityMetadata,
|
| 18 |
+
EvidenceEnvelope,
|
| 19 |
+
EvidenceItem,
|
| 20 |
+
LookupResult,
|
| 21 |
+
)
|
| 22 |
+
from .defaults import get_timeout
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class KeywordAnalyzerCapability(CapabilityBase):
|
| 26 |
+
"""๋ฏผ์ ํค์๋ ๋ถ์ capability.
|
| 27 |
+
|
| 28 |
+
ํต์ฌํค์๋์ ์ฐ๊ด์ด๋ฅผ ์กฐํฉํ์ฌ ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํ๋ค.
|
| 29 |
+
|
| 30 |
+
Parameters
|
| 31 |
+
----------
|
| 32 |
+
action : Optional[MinwonAnalysisAction]
|
| 33 |
+
API ํธ์ถ์ฉ Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, action: Optional[Any] = None) -> None:
|
| 37 |
+
self._action = action
|
| 38 |
+
|
| 39 |
+
@property
|
| 40 |
+
def metadata(self) -> CapabilityMetadata:
|
| 41 |
+
return CapabilityMetadata(
|
| 42 |
+
name="keyword_analyzer",
|
| 43 |
+
description=("ํต์ฌํค์๋์ ์ฐ๊ด์ด๋ฅผ ์กฐํฉํ์ฌ " "๋ฏผ์ ํค์๋ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ์ ๊ณตํฉ๋๋ค."),
|
| 44 |
+
approval_summary="๊ณต๊ณต๋ฐ์ดํฐํฌํธ์์ ๋ฏผ์ ํค์๋๋ฅผ ๋ถ์ํฉ๋๋ค.",
|
| 45 |
+
provider="data.go.kr",
|
| 46 |
+
timeout_sec=get_timeout("keyword_analyzer"),
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
async def execute(
|
| 50 |
+
self,
|
| 51 |
+
query: str,
|
| 52 |
+
context: Dict[str, Any],
|
| 53 |
+
session: Any,
|
| 54 |
+
) -> LookupResult:
|
| 55 |
+
"""ํต์ฌํค์๋ + ์ฐ๊ด์ด API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ์กฐํฉํ๋ค."""
|
| 56 |
+
provider = self.metadata.provider
|
| 57 |
+
|
| 58 |
+
if not query or not query.strip():
|
| 59 |
+
return LookupResult(
|
| 60 |
+
success=False,
|
| 61 |
+
query=query,
|
| 62 |
+
provider=provider,
|
| 63 |
+
error="query๊ฐ ๋น์ด์์ต๋๋ค",
|
| 64 |
+
empty_reason="validation_error",
|
| 65 |
+
evidence=EvidenceEnvelope(status="error", errors=["query๊ฐ ๋น์ด์์ต๋๋ค"]),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
if self._action is None:
|
| 69 |
+
logger.debug("[keyword_analyzer] action์ด None - ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 70 |
+
return LookupResult(
|
| 71 |
+
success=True,
|
| 72 |
+
query=query,
|
| 73 |
+
provider=provider,
|
| 74 |
+
empty_reason="no_match",
|
| 75 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
date_from = context.get("date_from", "")
|
| 79 |
+
date_to = context.get("date_to", "")
|
| 80 |
+
searchword = context.get("searchword", "")
|
| 81 |
+
result_count = int(context.get("result_count", 5))
|
| 82 |
+
|
| 83 |
+
try:
|
| 84 |
+
core_kw, related = await asyncio.wait_for(
|
| 85 |
+
self._fetch_all(date_from, date_to, searchword, result_count),
|
| 86 |
+
timeout=self.metadata.timeout_sec,
|
| 87 |
+
)
|
| 88 |
+
except asyncio.TimeoutError:
|
| 89 |
+
msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)"
|
| 90 |
+
logger.warning(f"[keyword_analyzer] {msg}")
|
| 91 |
+
return LookupResult(
|
| 92 |
+
success=False,
|
| 93 |
+
query=query,
|
| 94 |
+
provider=provider,
|
| 95 |
+
error=msg,
|
| 96 |
+
empty_reason="provider_error",
|
| 97 |
+
evidence=EvidenceEnvelope(status="error", errors=[msg]),
|
| 98 |
+
)
|
| 99 |
+
except Exception as exc:
|
| 100 |
+
logger.error(f"[keyword_analyzer] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 101 |
+
return LookupResult(
|
| 102 |
+
success=False,
|
| 103 |
+
query=query,
|
| 104 |
+
provider=provider,
|
| 105 |
+
error=str(exc),
|
| 106 |
+
empty_reason="provider_error",
|
| 107 |
+
evidence=EvidenceEnvelope(status="error", errors=[str(exc)]),
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
all_results: List[Dict[str, Any]] = []
|
| 111 |
+
evidence_items: List[EvidenceItem] = []
|
| 112 |
+
errors: List[str] = []
|
| 113 |
+
|
| 114 |
+
if core_kw is not None:
|
| 115 |
+
for item in core_kw:
|
| 116 |
+
item["_source_api"] = "core_keyword"
|
| 117 |
+
all_results.append(item)
|
| 118 |
+
evidence_items.append(
|
| 119 |
+
EvidenceItem(
|
| 120 |
+
source_type="api",
|
| 121 |
+
title=item.get("label", ""),
|
| 122 |
+
excerpt=f"ํต์ฌํค์๋: {item.get('label', '')}, "
|
| 123 |
+
f"์ ์={item.get('value', 0)}",
|
| 124 |
+
provider_meta={"provider": provider, "api": "core_keyword"},
|
| 125 |
+
)
|
| 126 |
+
)
|
| 127 |
+
else:
|
| 128 |
+
errors.append("ํต์ฌํค์๋ API ์คํจ")
|
| 129 |
+
|
| 130 |
+
if related is not None:
|
| 131 |
+
for item in related:
|
| 132 |
+
item["_source_api"] = "related_word"
|
| 133 |
+
all_results.append(item)
|
| 134 |
+
evidence_items.append(
|
| 135 |
+
EvidenceItem(
|
| 136 |
+
source_type="api",
|
| 137 |
+
title=item.get("label", ""),
|
| 138 |
+
excerpt=f"์ฐ๊ด์ด: {item.get('label', '')}, " f"์ ์={item.get('value', 0)}",
|
| 139 |
+
provider_meta={"provider": provider, "api": "related_word"},
|
| 140 |
+
)
|
| 141 |
+
)
|
| 142 |
+
else:
|
| 143 |
+
if searchword:
|
| 144 |
+
errors.append("์ฐ๊ด์ด API ์คํจ")
|
| 145 |
+
|
| 146 |
+
if not all_results:
|
| 147 |
+
status = "error" if errors else "empty"
|
| 148 |
+
return LookupResult(
|
| 149 |
+
success=not errors,
|
| 150 |
+
query=query,
|
| 151 |
+
provider=provider,
|
| 152 |
+
empty_reason="no_match" if not errors else "provider_error",
|
| 153 |
+
error="; ".join(errors) if errors else None,
|
| 154 |
+
evidence=EvidenceEnvelope(items=[], status=status, errors=errors),
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
context_text = self._build_context_text(core_kw, related)
|
| 158 |
+
status = "ok" if not errors else "partial"
|
| 159 |
+
|
| 160 |
+
return LookupResult(
|
| 161 |
+
success=True,
|
| 162 |
+
query=query,
|
| 163 |
+
results=all_results,
|
| 164 |
+
context_text=context_text,
|
| 165 |
+
provider=provider,
|
| 166 |
+
evidence=EvidenceEnvelope(
|
| 167 |
+
items=evidence_items,
|
| 168 |
+
summary_text=context_text,
|
| 169 |
+
status=status,
|
| 170 |
+
errors=errors,
|
| 171 |
+
),
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
async def _fetch_all(
|
| 175 |
+
self,
|
| 176 |
+
date_from: str,
|
| 177 |
+
date_to: str,
|
| 178 |
+
searchword: str,
|
| 179 |
+
result_count: int,
|
| 180 |
+
) -> tuple:
|
| 181 |
+
"""ํต์ฌํค์๋ + ์ฐ๊ด์ด๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๋ค."""
|
| 182 |
+
tasks = []
|
| 183 |
+
|
| 184 |
+
# ํต์ฌํค์๋๋ date_from/date_to๊ฐ ์์ผ๋ฉด ํญ์ ํธ์ถ
|
| 185 |
+
if date_from and date_to:
|
| 186 |
+
tasks.append(
|
| 187 |
+
self._safe_call(
|
| 188 |
+
self._action.get_core_keywords,
|
| 189 |
+
date_from=date_from,
|
| 190 |
+
date_to=date_to,
|
| 191 |
+
result_count=result_count,
|
| 192 |
+
)
|
| 193 |
+
)
|
| 194 |
+
else:
|
| 195 |
+
tasks.append(self._noop())
|
| 196 |
+
|
| 197 |
+
# ์ฐ๊ด์ด๋ searchword๊ฐ ์์ ๋๋ง ํธ์ถ
|
| 198 |
+
if date_from and date_to and searchword:
|
| 199 |
+
tasks.append(
|
| 200 |
+
self._safe_call(
|
| 201 |
+
self._action.get_related_words,
|
| 202 |
+
date_from=date_from,
|
| 203 |
+
date_to=date_to,
|
| 204 |
+
searchword=searchword,
|
| 205 |
+
result_count=result_count,
|
| 206 |
+
)
|
| 207 |
+
)
|
| 208 |
+
else:
|
| 209 |
+
tasks.append(self._noop())
|
| 210 |
+
|
| 211 |
+
return tuple(await asyncio.gather(*tasks))
|
| 212 |
+
|
| 213 |
+
@staticmethod
|
| 214 |
+
async def _safe_call(fn, **kwargs) -> Optional[List[Dict[str, Any]]]:
|
| 215 |
+
"""๊ฐ๋ณ API ํธ์ถ์ ์์ ํ๊ฒ ๋ํํ๋ค."""
|
| 216 |
+
try:
|
| 217 |
+
return await fn(**kwargs)
|
| 218 |
+
except Exception as exc:
|
| 219 |
+
logger.warning(f"[keyword_analyzer] ๊ฐ๋ณ API ์คํจ: {exc}")
|
| 220 |
+
return None
|
| 221 |
+
|
| 222 |
+
@staticmethod
|
| 223 |
+
async def _noop() -> None:
|
| 224 |
+
"""๋น ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ no-op ์ฝ๋ฃจํด."""
|
| 225 |
+
return None
|
| 226 |
+
|
| 227 |
+
@staticmethod
|
| 228 |
+
def _build_context_text(
|
| 229 |
+
core_kw: Optional[List],
|
| 230 |
+
related: Optional[List],
|
| 231 |
+
) -> str:
|
| 232 |
+
"""์กฐํฉ ๊ฒฐ๊ณผ์์ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค."""
|
| 233 |
+
parts: List[str] = []
|
| 234 |
+
|
| 235 |
+
if core_kw:
|
| 236 |
+
items = []
|
| 237 |
+
for k in core_kw[:5]:
|
| 238 |
+
label = k.get("label", "")
|
| 239 |
+
value = k.get("value", 0)
|
| 240 |
+
try:
|
| 241 |
+
value_f = float(value)
|
| 242 |
+
items.append(f"{label}({value_f:,.0f}๊ฑด)")
|
| 243 |
+
except (ValueError, TypeError):
|
| 244 |
+
items.append(f"{label}({value})")
|
| 245 |
+
if items:
|
| 246 |
+
parts.append(f"ํต์ฌ ํค์๋: {', '.join(items)}")
|
| 247 |
+
|
| 248 |
+
if related:
|
| 249 |
+
items = []
|
| 250 |
+
for r in related[:5]:
|
| 251 |
+
label = r.get("label", "")
|
| 252 |
+
value = r.get("value", 0)
|
| 253 |
+
try:
|
| 254 |
+
value_f = float(value)
|
| 255 |
+
items.append(f"{label}({value_f:,.1f}์ )")
|
| 256 |
+
except (ValueError, TypeError):
|
| 257 |
+
items.append(f"{label}({value})")
|
| 258 |
+
if items:
|
| 259 |
+
parts.append(f"์ฐ๊ด์ด: {', '.join(items)}")
|
| 260 |
+
|
| 261 |
+
return ", ".join(parts) if parts else ""
|
src/inference/graph/capabilities/rag_search.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""rag_search capability โ ๋ก์ปฌ ๋ฌธ์ ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์์ LangGraph capability๋ก ํ์คํ.
|
| 2 |
+
|
| 3 |
+
Issue #395: local RAG ๊ฒ์์ LangGraph tool capability๋ก ํ์คํ.
|
| 4 |
+
|
| 5 |
+
ApiLookupCapability ํจํด์ ๋ฐ๋ผ ํ๋ผ๋ฏธํฐ ๊ฒ์ฆ, ํ์์์, ๊ฒฐ๊ณผ ์ ๊ทํ,
|
| 6 |
+
fallback ์ ์ฑ
(empty/low-confidence)์ ๊ตฌํํ๋ค.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import asyncio
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from typing import Any, Callable, Dict, List, Optional
|
| 14 |
+
|
| 15 |
+
from loguru import logger
|
| 16 |
+
|
| 17 |
+
from src.inference.index_manager import IndexType
|
| 18 |
+
|
| 19 |
+
from .base import CapabilityBase, CapabilityMetadata, EvidenceEnvelope, EvidenceItem, LookupResult
|
| 20 |
+
from .defaults import get_timeout
|
| 21 |
+
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
# ์์
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
|
| 26 |
+
LOW_CONFIDENCE_THRESHOLD: float = 0.3
|
| 27 |
+
"""์ ์ ๋ขฐ๋ ๊ธฐ๋ณธ ์๊ณ๊ฐ. ๋ชจ๋ ๊ฒฐ๊ณผ score๊ฐ ์ด ๊ฐ ๋ฏธ๋ง์ด๋ฉด low_confidence๋ก ๋ถ๋ฅ."""
|
| 28 |
+
|
| 29 |
+
_VALID_SOURCE_TYPES: frozenset[str] = frozenset(t.value for t in IndexType)
|
| 30 |
+
_DEFAULT_SOURCE_TYPES: list[str] = [t.value for t in IndexType]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
# ํ๋ผ๋ฏธํฐ validator
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@dataclass
|
| 39 |
+
class RagSearchParams:
|
| 40 |
+
"""rag_search ํธ์ถ ํ๋ผ๋ฏธํฐ โ context์์ ์ถ์ถยท์ ๊ทํยท๊ฒ์ฆ."""
|
| 41 |
+
|
| 42 |
+
query: str
|
| 43 |
+
top_k: int = 5
|
| 44 |
+
source_types: List[str] = field(default_factory=lambda: list(_DEFAULT_SOURCE_TYPES))
|
| 45 |
+
min_confidence: float = LOW_CONFIDENCE_THRESHOLD
|
| 46 |
+
|
| 47 |
+
@classmethod
|
| 48 |
+
def from_context(
|
| 49 |
+
cls,
|
| 50 |
+
query: str,
|
| 51 |
+
context: Dict[str, Any],
|
| 52 |
+
default_min_confidence: float = LOW_CONFIDENCE_THRESHOLD,
|
| 53 |
+
) -> "RagSearchParams":
|
| 54 |
+
"""context์์ ํ๋ผ๋ฏธํฐ๋ฅผ ์ถ์ถํ๊ณ alias๋ฅผ ์ ๊ทํํ๋ค."""
|
| 55 |
+
top_k = int(context.get("rag_top_k") or context.get("top_k") or context.get("count") or 5)
|
| 56 |
+
|
| 57 |
+
raw_filters = context.get("filters") or context.get("source_types")
|
| 58 |
+
if isinstance(raw_filters, list):
|
| 59 |
+
source_types = [str(f).lower() for f in raw_filters]
|
| 60 |
+
else:
|
| 61 |
+
source_types = list(_DEFAULT_SOURCE_TYPES)
|
| 62 |
+
|
| 63 |
+
raw = context.get("rag_min_confidence")
|
| 64 |
+
if raw is None:
|
| 65 |
+
raw = context.get("min_confidence")
|
| 66 |
+
if raw is None:
|
| 67 |
+
raw = context.get("score_threshold")
|
| 68 |
+
if raw is None:
|
| 69 |
+
raw = default_min_confidence
|
| 70 |
+
min_confidence = float(raw)
|
| 71 |
+
|
| 72 |
+
return cls(
|
| 73 |
+
query=query.strip(),
|
| 74 |
+
top_k=max(1, min(50, top_k)),
|
| 75 |
+
source_types=source_types,
|
| 76 |
+
min_confidence=max(0.0, min(1.0, min_confidence)),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
def validate(self) -> Optional[str]:
|
| 80 |
+
"""๊ฒ์ฆ ์คํจ ์ ์ค๋ฅ ๋ฉ์์ง, ํต๊ณผ ์ None."""
|
| 81 |
+
if not self.query:
|
| 82 |
+
return "query๊ฐ ๋น์ด์์ต๋๋ค"
|
| 83 |
+
if len(self.query) > 2000:
|
| 84 |
+
return f"query๊ฐ ๋๋ฌด ๊น๋๋ค ({len(self.query)}์, ์ต๋ 2000์)"
|
| 85 |
+
invalid = [t for t in self.source_types if t not in _VALID_SOURCE_TYPES]
|
| 86 |
+
if invalid:
|
| 87 |
+
return f"์ ํจํ์ง ์์ source_type: {invalid}"
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ---------------------------------------------------------------------------
|
| 92 |
+
# ๊ฒฐ๊ณผ ์ ๊ทํ ํฌํผ
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def _normalize_result(raw: Dict[str, Any]) -> Dict[str, Any]:
|
| 97 |
+
"""raw SearchResult dict์ ๊ณตํต ์คํค๋ง ํ๋๋ฅผ ๋ณด๊ฐํ๋ค.
|
| 98 |
+
|
| 99 |
+
์ถ๊ฐ ํ๋: excerpt, file_path, page, score, source_type, doc_id, title.
|
| 100 |
+
๊ธฐ์กด ํ๋๋ ๊ทธ๋๋ก ์ ์ง.
|
| 101 |
+
"""
|
| 102 |
+
content = raw.get("content", "")
|
| 103 |
+
metadata = raw.get("metadata", {})
|
| 104 |
+
result = dict(raw)
|
| 105 |
+
result["excerpt"] = content[:500] if content else ""
|
| 106 |
+
result["file_path"] = metadata.get("file_path", "")
|
| 107 |
+
result["page"] = metadata.get("page", raw.get("chunk_index", 0))
|
| 108 |
+
result["score"] = raw.get("score", 0.0)
|
| 109 |
+
result["source_type"] = raw.get("source_type", "")
|
| 110 |
+
result["doc_id"] = raw.get("doc_id", "")
|
| 111 |
+
result["title"] = raw.get("title", "")
|
| 112 |
+
return result
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# ---------------------------------------------------------------------------
|
| 116 |
+
# Capability
|
| 117 |
+
# ---------------------------------------------------------------------------
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class RagSearchCapability(CapabilityBase):
|
| 121 |
+
"""๋ก์ปฌ ๋ฌธ์(๋ฒ๋ น/๋งค๋ด์ผ/์ฌ๋ก/๊ณต์ง) ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ capability.
|
| 122 |
+
|
| 123 |
+
๊ธฐ์กด api_server์ _rag_search_tool closure๋ฅผ ์ฃผ์
๋ฐ์
|
| 124 |
+
CapabilityBase ์ธํฐํ์ด์ค๋ก ๋ํํ๊ณ , ํ๋ผ๋ฏธํฐ ๊ฒ์ฆยทํ์์์ยท
|
| 125 |
+
๊ฒฐ๊ณผ ์ ๊ทํยทfallback ์ ์ฑ
์ ์ ์ฉํ๋ค.
|
| 126 |
+
|
| 127 |
+
Parameters
|
| 128 |
+
----------
|
| 129 |
+
execute_fn : Callable
|
| 130 |
+
``async (query, context, session) -> dict`` ์๊ทธ๋์ฒ์ ์คํ ํจ์.
|
| 131 |
+
low_confidence_threshold : float
|
| 132 |
+
์ ์ ๋ขฐ๋ ์๊ณ๊ฐ. ๋ชจ๋ ๊ฒฐ๊ณผ๊ฐ ์ด ๊ฐ ๋ฏธ๋ง์ด๋ฉด ``low_confidence``๋ก ๋ถ๋ฅ.
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
def __init__(
|
| 136 |
+
self,
|
| 137 |
+
execute_fn: Callable[..., Any],
|
| 138 |
+
low_confidence_threshold: float = LOW_CONFIDENCE_THRESHOLD,
|
| 139 |
+
) -> None:
|
| 140 |
+
self._execute_fn = execute_fn
|
| 141 |
+
self._low_confidence_threshold = low_confidence_threshold
|
| 142 |
+
|
| 143 |
+
@property
|
| 144 |
+
def metadata(self) -> CapabilityMetadata:
|
| 145 |
+
return CapabilityMetadata(
|
| 146 |
+
name="rag_search",
|
| 147 |
+
description=(
|
| 148 |
+
"๋ด๋ถ ๋ฒ๋ น, ๋งค๋ด์ผ, ๋ฏผ์ ์ฌ๋ก, ๊ณต์ง์ฌํญ ๋ฑ ๋ก์ปฌ ๋ฌธ์๋ฅผ "
|
| 149 |
+
"ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์(BM25 + ๋ฒกํฐ)์ผ๋ก ์กฐํํฉ๋๋ค."
|
| 150 |
+
),
|
| 151 |
+
approval_summary="๋ก์ปฌ ๋ฌธ์ DB์์ ๊ด๋ จ ๋ฒ๋ น/์ฌ๋ก๋ฅผ ๊ฒ์ํฉ๋๋ค.",
|
| 152 |
+
provider="local_vectordb",
|
| 153 |
+
timeout_sec=get_timeout("rag_search"),
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
async def execute(
|
| 157 |
+
self,
|
| 158 |
+
query: str,
|
| 159 |
+
context: Dict[str, Any],
|
| 160 |
+
session: Any,
|
| 161 |
+
) -> LookupResult:
|
| 162 |
+
"""๊ฒ์ ์คํ โ ํ๋ผ๋ฏธํฐ ๊ฒ์ฆ, ํ์์์, ์ ๊ทํ, fallback ์ ์ฉ."""
|
| 163 |
+
provider = self.metadata.provider
|
| 164 |
+
|
| 165 |
+
params = RagSearchParams.from_context(
|
| 166 |
+
query, context, default_min_confidence=self._low_confidence_threshold
|
| 167 |
+
)
|
| 168 |
+
validation_error = params.validate()
|
| 169 |
+
if validation_error:
|
| 170 |
+
return LookupResult(
|
| 171 |
+
success=False,
|
| 172 |
+
query=query,
|
| 173 |
+
provider=provider,
|
| 174 |
+
error=validation_error,
|
| 175 |
+
empty_reason="validation_error",
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
raw = await asyncio.wait_for(
|
| 180 |
+
self._execute_fn(query=params.query, context=context, session=session),
|
| 181 |
+
timeout=self.metadata.timeout_sec,
|
| 182 |
+
)
|
| 183 |
+
except asyncio.TimeoutError:
|
| 184 |
+
logger.warning(f"[rag_search] ํ์์์ ({self.metadata.timeout_sec}s ์ด๊ณผ)")
|
| 185 |
+
return LookupResult(
|
| 186 |
+
success=False,
|
| 187 |
+
query=params.query,
|
| 188 |
+
provider=provider,
|
| 189 |
+
error=f"๊ฒ์ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)",
|
| 190 |
+
empty_reason="provider_error",
|
| 191 |
+
)
|
| 192 |
+
except Exception as exc:
|
| 193 |
+
logger.error(f"[rag_search] ๊ฒ์ ์ค๋ฅ: {exc}", exc_info=True)
|
| 194 |
+
return LookupResult(
|
| 195 |
+
success=False,
|
| 196 |
+
query=params.query,
|
| 197 |
+
provider=provider,
|
| 198 |
+
error=str(exc),
|
| 199 |
+
empty_reason="provider_error",
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
if not isinstance(raw, dict):
|
| 203 |
+
raw = {}
|
| 204 |
+
|
| 205 |
+
if raw.get("error"):
|
| 206 |
+
return LookupResult(
|
| 207 |
+
success=False,
|
| 208 |
+
query=raw.get("query", params.query),
|
| 209 |
+
provider=provider,
|
| 210 |
+
error=raw["error"],
|
| 211 |
+
empty_reason="provider_error",
|
| 212 |
+
evidence=EvidenceEnvelope(
|
| 213 |
+
status="error",
|
| 214 |
+
errors=[raw["error"]],
|
| 215 |
+
),
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
raw_query = raw.get("query", params.query)
|
| 219 |
+
raw_context_text = raw.get("context_text", "")
|
| 220 |
+
raw_results = raw.get("results", [])
|
| 221 |
+
normalized = [_normalize_result(r) for r in raw_results]
|
| 222 |
+
|
| 223 |
+
# EvidenceItem์ผ๋ก ์ ๊ทํ
|
| 224 |
+
evidence_items = []
|
| 225 |
+
for r in normalized:
|
| 226 |
+
evidence_items.append(
|
| 227 |
+
EvidenceItem(
|
| 228 |
+
source_type="rag",
|
| 229 |
+
title=r.get("title", ""),
|
| 230 |
+
excerpt=r.get("excerpt", "")[:500],
|
| 231 |
+
link_or_path=r.get("file_path", ""),
|
| 232 |
+
page=r.get("page"),
|
| 233 |
+
score=float(r.get("score", 0.0)),
|
| 234 |
+
provider_meta={"provider": provider},
|
| 235 |
+
)
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
if not normalized:
|
| 239 |
+
return LookupResult(
|
| 240 |
+
success=True,
|
| 241 |
+
query=raw_query,
|
| 242 |
+
provider=provider,
|
| 243 |
+
empty_reason="no_match",
|
| 244 |
+
context_text=raw_context_text,
|
| 245 |
+
evidence=EvidenceEnvelope(items=[], status="empty"),
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
confident = [r for r in normalized if r["score"] >= params.min_confidence]
|
| 249 |
+
if not confident:
|
| 250 |
+
logger.info(f"[rag_search] ๋ชจ๋ ๊ฒฐ๊ณผ๊ฐ ์ ์ ๋ขฐ๋ (threshold={params.min_confidence})")
|
| 251 |
+
return LookupResult(
|
| 252 |
+
success=True,
|
| 253 |
+
query=raw_query,
|
| 254 |
+
results=normalized,
|
| 255 |
+
context_text=raw_context_text,
|
| 256 |
+
provider=provider,
|
| 257 |
+
empty_reason="low_confidence",
|
| 258 |
+
evidence=EvidenceEnvelope(
|
| 259 |
+
items=evidence_items,
|
| 260 |
+
status="partial",
|
| 261 |
+
),
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
confident_evidence = [ei for ei in evidence_items if ei.score >= params.min_confidence]
|
| 265 |
+
citations = [
|
| 266 |
+
{
|
| 267 |
+
"source_type": r["source_type"],
|
| 268 |
+
"doc_id": r["doc_id"],
|
| 269 |
+
"title": r["title"],
|
| 270 |
+
"score": r["score"],
|
| 271 |
+
"excerpt": r["excerpt"][:200],
|
| 272 |
+
}
|
| 273 |
+
for r in confident
|
| 274 |
+
]
|
| 275 |
+
|
| 276 |
+
return LookupResult(
|
| 277 |
+
success=True,
|
| 278 |
+
query=raw_query,
|
| 279 |
+
results=confident,
|
| 280 |
+
context_text=raw_context_text,
|
| 281 |
+
citations=citations,
|
| 282 |
+
provider=provider,
|
| 283 |
+
evidence=EvidenceEnvelope(
|
| 284 |
+
items=confident_evidence,
|
| 285 |
+
status="ok",
|
| 286 |
+
),
|
| 287 |
+
)
|
src/inference/graph/capabilities/registry.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""tool metadata registry โ MVP capability์ ๋จ์ผ ์์ค.
|
| 2 |
+
|
| 3 |
+
Issue #416: tool metadata registry ๋ฐ LangGraph executor binding ์ ๋ฆฌ.
|
| 4 |
+
|
| 5 |
+
์ด ๋ชจ๋์ ๋ค์์ ๋ณด์ฅํ๋ค:
|
| 6 |
+
- planner๊ฐ ์ฝ๋ metadata์ executor binding์ด ๊ฐ์ ์์ค์์ ๋์จ๋ค
|
| 7 |
+
- approval prompt์ session log๊ฐ ๋์ผํ capability identifier๋ฅผ ์ฌ์ฉํ๋ค
|
| 8 |
+
- ๋นMVP capability๊ฐ registry ์์ค์์ ์ฐจ๋จ๋๋ค
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
from typing import Any, Callable, Dict, List
|
| 14 |
+
|
| 15 |
+
from src.inference.tool_router import ToolType
|
| 16 |
+
|
| 17 |
+
from .api_lookup import ApiLookupCapability
|
| 18 |
+
from .append_evidence import AppendEvidenceCapability
|
| 19 |
+
from .base import CapabilityBase, CapabilityMetadata
|
| 20 |
+
from .demographics_lookup import DemographicsLookupCapability
|
| 21 |
+
from .draft_civil_response import DraftCivilResponseCapability
|
| 22 |
+
from .issue_detector import IssueDetectorCapability
|
| 23 |
+
from .keyword_analyzer import KeywordAnalyzerCapability
|
| 24 |
+
from .rag_search import RagSearchCapability
|
| 25 |
+
from .stats_lookup import StatsLookupCapability
|
| 26 |
+
|
| 27 |
+
# ---------------------------------------------------------------------------
|
| 28 |
+
# MVP capability stable identifiers (session log, approval prompt์์ ์ฌ์ฉ)
|
| 29 |
+
# ToolType enum์์ ํ์ํ์ฌ ๋จ์ผ ์์ค๋ฅผ ์ ์งํ๋ค.
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
|
| 32 |
+
MVP_CAPABILITY_IDS: frozenset[str] = frozenset(t.value for t in ToolType)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def get_mvp_capability_ids() -> frozenset[str]:
|
| 36 |
+
"""MVP capability stable identifier ์งํฉ์ ๋ฐํํ๋ค.
|
| 37 |
+
|
| 38 |
+
plan_validator, planner_adapter ๋ฑ์์ ํ์ดํธ๋ฆฌ์คํธ๋ก ์ฌ์ฉํ๋ค.
|
| 39 |
+
"""
|
| 40 |
+
return MVP_CAPABILITY_IDS
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def build_mvp_registry(
|
| 44 |
+
*,
|
| 45 |
+
rag_search_fn: Callable[..., Any],
|
| 46 |
+
api_lookup_action: Any = None,
|
| 47 |
+
draft_civil_response_fn: Callable[..., Any],
|
| 48 |
+
append_evidence_fn: Callable[..., Any],
|
| 49 |
+
rag_low_confidence_threshold: float = 0.3,
|
| 50 |
+
) -> Dict[str, CapabilityBase]:
|
| 51 |
+
"""MVP 4๊ฐ capability๋ฅผ CapabilityBase ์ธ์คํด์ค๋ก ๊ตฌ์ฑํ registry๋ฅผ ๋ฐํํ๋ค.
|
| 52 |
+
|
| 53 |
+
๋ชจ๋ capability๊ฐ CapabilityBase๋ฅผ ๊ตฌํํ๋ฏ๋ก,
|
| 54 |
+
RegistryExecutorAdapter.get_tool_metadata()๊ฐ ์ผ๊ด๋ metadata๋ฅผ ๋ฐํํ๋ค.
|
| 55 |
+
|
| 56 |
+
Parameters
|
| 57 |
+
----------
|
| 58 |
+
rag_search_fn : Callable
|
| 59 |
+
``async (query, context, session) -> dict`` ํํ์ RAG ๊ฒ์ ํจ์.
|
| 60 |
+
api_lookup_action : Any, optional
|
| 61 |
+
``MinwonAnalysisAction`` ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ.
|
| 62 |
+
draft_civil_response_fn : Callable
|
| 63 |
+
``async (query, context, session) -> dict`` ํํ์ ๋ฏผ์ ๋ต๋ณ ์์ฑ ํจ์.
|
| 64 |
+
append_evidence_fn : Callable
|
| 65 |
+
``async (query, context, session) -> dict`` ํํ์ ๊ทผ๊ฑฐ ๋ณด๊ฐ ํจ์.
|
| 66 |
+
rag_low_confidence_threshold : float
|
| 67 |
+
RAG ๊ฒ์ ์ ์ ๋ขฐ๋ ์๊ณ๊ฐ. ๊ธฐ๋ณธ๊ฐ 0.3.
|
| 68 |
+
|
| 69 |
+
Returns
|
| 70 |
+
-------
|
| 71 |
+
Dict[str, CapabilityBase]
|
| 72 |
+
capability name -> CapabilityBase ์ธ์คํด์ค ๋งคํ.
|
| 73 |
+
"""
|
| 74 |
+
return {
|
| 75 |
+
"rag_search": RagSearchCapability(
|
| 76 |
+
execute_fn=rag_search_fn,
|
| 77 |
+
low_confidence_threshold=rag_low_confidence_threshold,
|
| 78 |
+
),
|
| 79 |
+
"api_lookup": ApiLookupCapability(action=api_lookup_action),
|
| 80 |
+
"draft_civil_response": DraftCivilResponseCapability(
|
| 81 |
+
execute_fn=draft_civil_response_fn,
|
| 82 |
+
),
|
| 83 |
+
"append_evidence": AppendEvidenceCapability(execute_fn=append_evidence_fn),
|
| 84 |
+
"issue_detector": IssueDetectorCapability(action=api_lookup_action),
|
| 85 |
+
"stats_lookup": StatsLookupCapability(action=api_lookup_action),
|
| 86 |
+
"keyword_analyzer": KeywordAnalyzerCapability(action=api_lookup_action),
|
| 87 |
+
"demographics_lookup": DemographicsLookupCapability(action=api_lookup_action),
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def get_all_metadata(
|
| 92 |
+
registry: Dict[str, CapabilityBase],
|
| 93 |
+
) -> List[Dict[str, Any]]:
|
| 94 |
+
"""registry์ ๋ฑ๋ก๋ ๋ชจ๋ capability์ metadata๋ฅผ dict ๋ชฉ๋ก์ผ๋ก ๋ฐํํ๋ค.
|
| 95 |
+
|
| 96 |
+
planner๊ฐ tool ๋ชฉ๋ก์ ๊ตฌ์ฑํ ๋ ์ฌ์ฉํ๋ค.
|
| 97 |
+
|
| 98 |
+
Parameters
|
| 99 |
+
----------
|
| 100 |
+
registry : Dict[str, CapabilityBase]
|
| 101 |
+
build_mvp_registry()๊ฐ ๋ฐํํ registry.
|
| 102 |
+
|
| 103 |
+
Returns
|
| 104 |
+
-------
|
| 105 |
+
List[Dict[str, Any]]
|
| 106 |
+
๊ฐ capability์ metadata dict ๋ชฉ๋ก.
|
| 107 |
+
"""
|
| 108 |
+
result: List[Dict[str, Any]] = []
|
| 109 |
+
for name, cap in registry.items():
|
| 110 |
+
meta = cap.metadata
|
| 111 |
+
result.append(
|
| 112 |
+
{
|
| 113 |
+
"name": meta.name,
|
| 114 |
+
"description": meta.description,
|
| 115 |
+
"approval_summary": meta.approval_summary,
|
| 116 |
+
"provider": meta.provider,
|
| 117 |
+
"timeout_sec": meta.timeout_sec,
|
| 118 |
+
}
|
| 119 |
+
)
|
| 120 |
+
return result
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def is_mvp_capability(name: str) -> bool:
|
| 124 |
+
"""์ฃผ์ด์ง ์ด๋ฆ์ด MVP capability์ธ์ง ํ์ธํ๋ค."""
|
| 125 |
+
return name in MVP_CAPABILITY_IDS
|
src/inference/graph/capabilities/stats_lookup.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""stats_lookup capability โ ๋ง์ถคํํต๊ณ+ํธ๋ ๋+๊ฑด์+๊ธฐ๊ด์์+์ง์ญ์์ ์กฐํฉ.
|
| 2 |
+
|
| 3 |
+
Issue #487: ๋ฏผ์ ํต๊ณ ์กฐํ ๋๊ตฌ.
|
| 4 |
+
5๊ฐ API๋ฅผ ์กฐํฉํ์ฌ ๋ฏผ์ ํต๊ณ ํํฉ์ ์ ๊ณตํ๋ค.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
from typing import Any, Dict, List, Optional
|
| 11 |
+
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
from .base import (
|
| 15 |
+
CapabilityBase,
|
| 16 |
+
CapabilityMetadata,
|
| 17 |
+
EvidenceEnvelope,
|
| 18 |
+
EvidenceItem,
|
| 19 |
+
LookupResult,
|
| 20 |
+
)
|
| 21 |
+
from .defaults import get_timeout
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class StatsLookupCapability(CapabilityBase):
|
| 25 |
+
"""๋ฏผ์ ํต๊ณ ์กฐํ capability.
|
| 26 |
+
|
| 27 |
+
ํค์๋๊ฐ ์์ผ๋ฉด ๊ฑด์+ํธ๋ ๋, ์์ผ๋ฉด ํต๊ณ+๊ธฐ๊ด์์+์ง์ญ์์๋ฅผ ์กฐํฉํ๋ค.
|
| 28 |
+
|
| 29 |
+
Parameters
|
| 30 |
+
----------
|
| 31 |
+
action : Optional[MinwonAnalysisAction]
|
| 32 |
+
API ํธ์ถ์ฉ Action ์ธ์คํด์ค. None์ด๋ฉด ๋น ๊ฒฐ๊ณผ ๋ฐํ.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, action: Optional[Any] = None) -> None:
|
| 36 |
+
self._action = action
|
| 37 |
+
|
| 38 |
+
@property
|
| 39 |
+
def metadata(self) -> CapabilityMetadata:
|
| 40 |
+
return CapabilityMetadata(
|
| 41 |
+
name="stats_lookup",
|
| 42 |
+
description=(
|
| 43 |
+
"๋ฏผ์ ํต๊ณ, ํธ๋ ๋, ๊ฑด์, ๊ธฐ๊ด/์ง์ญ ์์๋ฅผ ์กฐํฉํ์ฌ " "๋ฏผ์ ํํฉ ํต๊ณ๋ฅผ ์ ๊ณตํฉ๋๋ค."
|
| 44 |
+
),
|
| 45 |
+
approval_summary="๊ณต๊ณต๋ฐ์ดํฐํฌํธ์์ ๋ฏผ์ ํต๊ณ ํํฉ์ ์กฐํํฉ๋๋ค.",
|
| 46 |
+
provider="data.go.kr",
|
| 47 |
+
timeout_sec=get_timeout("stats_lookup"),
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
async def execute(
|
| 51 |
+
self,
|
| 52 |
+
query: str,
|
| 53 |
+
context: Dict[str, Any],
|
| 54 |
+
session: Any,
|
| 55 |
+
) -> LookupResult:
|
| 56 |
+
"""์กฐ๊ฑด์ ๋ฐ๋ผ API๋ฅผ ์กฐํฉ ํธ์ถํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ค."""
|
| 57 |
+
provider = self.metadata.provider
|
| 58 |
+
|
| 59 |
+
if not query or not query.strip():
|
| 60 |
+
return LookupResult(
|
| 61 |
+
success=False,
|
| 62 |
+
query=query,
|
| 63 |
+
provider=provider,
|
| 64 |
+
error="query๊ฐ ๋น์ด์์ต๋๋ค",
|
| 65 |
+
empty_reason="validation_error",
|
| 66 |
+
evidence=EvidenceEnvelope(status="error", errors=["query๊ฐ ๋น์ด์์ต๋๋ค"]),
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
if self._action is None:
|
| 70 |
+
logger.debug("[stats_lookup] action์ด None - ๋น ๊ฒฐ๊ณผ ๋ฐํ")
|
| 71 |
+
return LookupResult(
|
| 72 |
+
success=True,
|
| 73 |
+
query=query,
|
| 74 |
+
provider=provider,
|
| 75 |
+
empty_reason="no_match",
|
| 76 |
+
evidence=EvidenceEnvelope(status="empty"),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
date_from = context.get("date_from", "")
|
| 80 |
+
date_to = context.get("date_to", "")
|
| 81 |
+
searchword = context.get("searchword", "")
|
| 82 |
+
period = context.get("period", "DAILY")
|
| 83 |
+
top_n = int(context.get("top_n", 5))
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
results_map = await asyncio.wait_for(
|
| 87 |
+
self._fetch_all(date_from, date_to, searchword, period, top_n),
|
| 88 |
+
timeout=self.metadata.timeout_sec,
|
| 89 |
+
)
|
| 90 |
+
except asyncio.TimeoutError:
|
| 91 |
+
msg = f"API ํธ์ถ ํ์์์ ({self.metadata.timeout_sec}์ด ์ด๊ณผ)"
|
| 92 |
+
logger.warning(f"[stats_lookup] {msg}")
|
| 93 |
+
return LookupResult(
|
| 94 |
+
success=False,
|
| 95 |
+
query=query,
|
| 96 |
+
provider=provider,
|
| 97 |
+
error=msg,
|
| 98 |
+
empty_reason="provider_error",
|
| 99 |
+
evidence=EvidenceEnvelope(status="error", errors=[msg]),
|
| 100 |
+
)
|
| 101 |
+
except Exception as exc:
|
| 102 |
+
logger.error(f"[stats_lookup] API ํธ์ถ ์ค๋ฅ: {exc}", exc_info=True)
|
| 103 |
+
return LookupResult(
|
| 104 |
+
success=False,
|
| 105 |
+
query=query,
|
| 106 |
+
provider=provider,
|
| 107 |
+
error=str(exc),
|
| 108 |
+
empty_reason="provider_error",
|
| 109 |
+
evidence=EvidenceEnvelope(status="error", errors=[str(exc)]),
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
all_results: List[Dict[str, Any]] = []
|
| 113 |
+
evidence_items: List[EvidenceItem] = []
|
| 114 |
+
errors: List[str] = []
|
| 115 |
+
|
| 116 |
+
for api_name, items in results_map.items():
|
| 117 |
+
if items is None:
|
| 118 |
+
errors.append(f"{api_name} API ์คํจ")
|
| 119 |
+
continue
|
| 120 |
+
for item in items:
|
| 121 |
+
item["_source_api"] = api_name
|
| 122 |
+
all_results.append(item)
|
| 123 |
+
evidence_items.append(
|
| 124 |
+
EvidenceItem(
|
| 125 |
+
source_type="api",
|
| 126 |
+
title=item.get("label", item.get("term", "")),
|
| 127 |
+
excerpt=self._format_item(api_name, item),
|
| 128 |
+
provider_meta={"provider": provider, "api": api_name},
|
| 129 |
+
)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
if not all_results:
|
| 133 |
+
status = "error" if len(errors) == len(results_map) else "empty"
|
| 134 |
+
return LookupResult(
|
| 135 |
+
success=len(errors) < len(results_map),
|
| 136 |
+
query=query,
|
| 137 |
+
provider=provider,
|
| 138 |
+
empty_reason="no_match" if len(errors) < len(results_map) else "provider_error",
|
| 139 |
+
error="; ".join(errors) if errors else None,
|
| 140 |
+
evidence=EvidenceEnvelope(items=[], status=status, errors=errors),
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
context_text = self._build_context_text(results_map, date_from, date_to)
|
| 144 |
+
status = "ok" if not errors else "partial"
|
| 145 |
+
|
| 146 |
+
return LookupResult(
|
| 147 |
+
success=True,
|
| 148 |
+
query=query,
|
| 149 |
+
results=all_results,
|
| 150 |
+
context_text=context_text,
|
| 151 |
+
provider=provider,
|
| 152 |
+
evidence=EvidenceEnvelope(
|
| 153 |
+
items=evidence_items,
|
| 154 |
+
summary_text=context_text,
|
| 155 |
+
status=status,
|
| 156 |
+
errors=errors,
|
| 157 |
+
),
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
async def _fetch_all(
|
| 161 |
+
self,
|
| 162 |
+
date_from: str,
|
| 163 |
+
date_to: str,
|
| 164 |
+
searchword: str,
|
| 165 |
+
period: str,
|
| 166 |
+
top_n: int,
|
| 167 |
+
) -> Dict[str, Optional[List]]:
|
| 168 |
+
"""์กฐ๊ฑด์ ๋ฐ๋ผ ์ ์ ํ API๋ฅผ ๋ณ๋ ฌ ํธ์ถํ๋ค."""
|
| 169 |
+
tasks: Dict[str, Any] = {}
|
| 170 |
+
|
| 171 |
+
if searchword:
|
| 172 |
+
# ํค์๋ ๊ธฐ๋ฐ: ๊ฑด์ + ํธ๋ ๋
|
| 173 |
+
tasks["doc_count"] = self._safe_call(
|
| 174 |
+
self._action.get_doc_count,
|
| 175 |
+
date_from=date_from,
|
| 176 |
+
date_to=date_to,
|
| 177 |
+
searchword=searchword,
|
| 178 |
+
)
|
| 179 |
+
if date_from and date_to:
|
| 180 |
+
tasks["trend"] = self._safe_call(
|
| 181 |
+
self._action.get_trend,
|
| 182 |
+
date_from=date_from + "00",
|
| 183 |
+
date_to=date_to + "23",
|
| 184 |
+
period=period,
|
| 185 |
+
)
|
| 186 |
+
else:
|
| 187 |
+
# ์ผ๋ฐ ํต๊ณ: ํต๊ณ + ๊ธฐ๊ด์์ + ์ง์ญ์์
|
| 188 |
+
if date_from and date_to:
|
| 189 |
+
tasks["statistics"] = self._safe_call(
|
| 190 |
+
self._action.get_statistics,
|
| 191 |
+
date_from=date_from,
|
| 192 |
+
date_to=date_to,
|
| 193 |
+
period=period,
|
| 194 |
+
)
|
| 195 |
+
tasks["org_ranking"] = self._safe_call(
|
| 196 |
+
self._action.get_org_ranking,
|
| 197 |
+
date_from=date_from,
|
| 198 |
+
date_to=date_to,
|
| 199 |
+
top_n=top_n,
|
| 200 |
+
)
|
| 201 |
+
tasks["region_ranking"] = self._safe_call(
|
| 202 |
+
self._action.get_region_ranking,
|
| 203 |
+
date_from=date_from,
|
| 204 |
+
date_to=date_to,
|
| 205 |
+
top_n=top_n,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
if not tasks:
|
| 209 |
+
return {}
|
| 210 |
+
|
| 211 |
+
keys = list(tasks.keys())
|
| 212 |
+
values = await asyncio.gather(*tasks.values())
|
| 213 |
+
return dict(zip(keys, values))
|
| 214 |
+
|
| 215 |
+
@staticmethod
|
| 216 |
+
async def _safe_call(fn, **kwargs) -> Optional[List[Dict[str, Any]]]:
|
| 217 |
+
"""๊ฐ๋ณ API ํธ์ถ์ ์์ ํ๊ฒ ๋ํํ๋ค."""
|
| 218 |
+
try:
|
| 219 |
+
return await fn(**kwargs)
|
| 220 |
+
except Exception as exc:
|
| 221 |
+
logger.warning(f"[stats_lookup] ๊ฐ๋ณ API ์คํจ: {exc}")
|
| 222 |
+
return None
|
| 223 |
+
|
| 224 |
+
@staticmethod
|
| 225 |
+
def _format_item(api_name: str, item: Dict[str, Any]) -> str:
|
| 226 |
+
"""๊ฐ๋ณ ์์ดํ
์ ์์ฝ ๋ฌธ์์ด์ ์์ฑํ๋ค."""
|
| 227 |
+
label = item.get("label", "")
|
| 228 |
+
hits = item.get("hits", "")
|
| 229 |
+
if api_name == "doc_count":
|
| 230 |
+
pttn = item.get("pttn", 0)
|
| 231 |
+
dfpt = item.get("dfpt", 0)
|
| 232 |
+
saeol = item.get("saeol", 0)
|
| 233 |
+
return f"๊ตญ๋ฏผ์ ๋ฌธ๊ณ ={pttn}, ๋ฏผ์24={dfpt}, ์์ฌ={saeol}"
|
| 234 |
+
if api_name == "trend":
|
| 235 |
+
ratio = item.get("prebRatio", "")
|
| 236 |
+
return f"{label}: {hits}๊ฑด, ์ ์ผ๋๋น {ratio}%"
|
| 237 |
+
return f"{label}: {hits}๊ฑด"
|
| 238 |
+
|
| 239 |
+
@staticmethod
|
| 240 |
+
def _build_context_text(
|
| 241 |
+
results_map: Dict[str, Optional[List]],
|
| 242 |
+
date_from: str,
|
| 243 |
+
date_to: str,
|
| 244 |
+
) -> str:
|
| 245 |
+
"""์กฐํฉ ๊ฒฐ๊ณผ์์ ์์ฐ์ด ์์ฝ์ ์์ฑํ๋ค."""
|
| 246 |
+
parts: List[str] = []
|
| 247 |
+
period_str = ""
|
| 248 |
+
if date_from and date_to:
|
| 249 |
+
period_str = (
|
| 250 |
+
f"{date_from[:4]}/{date_from[4:6]}/{date_from[6:8]}~{date_to[4:6]}/{date_to[6:8]}"
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
doc_count = results_map.get("doc_count")
|
| 254 |
+
if doc_count and len(doc_count) > 0:
|
| 255 |
+
item = doc_count[0]
|
| 256 |
+
try:
|
| 257 |
+
pttn = int(item.get("pttn") or 0)
|
| 258 |
+
dfpt = int(item.get("dfpt") or 0)
|
| 259 |
+
saeol = int(item.get("saeol") or 0)
|
| 260 |
+
except (ValueError, TypeError):
|
| 261 |
+
pttn, dfpt, saeol = 0, 0, 0
|
| 262 |
+
total = pttn + dfpt + saeol
|
| 263 |
+
parts.append(f"{period_str} ์ด {total:,}๊ฑด" if period_str else f"์ด {total:,}๊ฑด")
|
| 264 |
+
|
| 265 |
+
stats = results_map.get("statistics")
|
| 266 |
+
if stats:
|
| 267 |
+
total = sum(int(s.get("hits", 0)) for s in stats)
|
| 268 |
+
parts.append(f"{period_str} ์ด {total:,}๊ฑด" if period_str else f"์ด {total:,}๊ฑด")
|
| 269 |
+
|
| 270 |
+
trend = results_map.get("trend")
|
| 271 |
+
if trend and len(trend) > 0:
|
| 272 |
+
last = trend[-1]
|
| 273 |
+
ratio = last.get("prebRatio", "")
|
| 274 |
+
if ratio:
|
| 275 |
+
parts.append(f"์ ์ผ๋๋น {'+' if not ratio.startswith('-') else ''}{ratio}%")
|
| 276 |
+
|
| 277 |
+
region = results_map.get("region_ranking")
|
| 278 |
+
if region and len(region) > 0:
|
| 279 |
+
top = region[0]
|
| 280 |
+
parts.append(f"{top.get('label', '')} ์ต๋ค({int(top.get('hits', 0)):,}๊ฑด)")
|
| 281 |
+
|
| 282 |
+
org = results_map.get("org_ranking")
|
| 283 |
+
if org and len(org) > 0:
|
| 284 |
+
top = org[0]
|
| 285 |
+
parts.append(f"๊ธฐ๊ด ์ต๋ค: {top.get('label', '')}({int(top.get('hits', 0)):,}๊ฑด)")
|
| 286 |
+
|
| 287 |
+
return ", ".join(parts) if parts else ""
|
src/inference/graph/executor_adapter.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Executor adapter: tool registry์์ tool์ ์กฐํํ๊ณ ์คํ.
|
| 2 |
+
|
| 3 |
+
Issue #415: LangGraph runtime ๊ธฐ๋ฐ ๋ฐ planner/executor adapter ๊ตฌ์ฑ.
|
| 4 |
+
Issue #416: tool metadata registry ๋ฐ LangGraph executor binding ์ ๋ฆฌ.
|
| 5 |
+
|
| 6 |
+
๋ ๊ฐ์ง ๊ตฌํ์ฒด๋ฅผ ์ ๊ณตํ๋ค:
|
| 7 |
+
- `ExecutorAdapter` (ABC): ์ถ์ ์ธํฐํ์ด์ค
|
| 8 |
+
- `RegistryExecutorAdapter`: CapabilityBase ๊ธฐ๋ฐ registry๋ฅผ ์ฌ์ฉํ๋ ๊ตฌํ์ฒด
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import asyncio
|
| 14 |
+
import time
|
| 15 |
+
from abc import ABC, abstractmethod
|
| 16 |
+
from typing import Any, Callable, Dict, List, Optional
|
| 17 |
+
|
| 18 |
+
from loguru import logger
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ExecutorAdapter(ABC):
|
| 22 |
+
"""Tool executor ์ถ์ ์ธํฐํ์ด์ค.
|
| 23 |
+
|
| 24 |
+
LangGraph graph์ `tool_execute` ๋
ธ๋์์ ํธ์ถ๋๋ค.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
@abstractmethod
|
| 28 |
+
async def execute(
|
| 29 |
+
self,
|
| 30 |
+
tool_name: str,
|
| 31 |
+
query: str,
|
| 32 |
+
context: Dict[str, Any],
|
| 33 |
+
) -> Dict[str, Any]:
|
| 34 |
+
"""tool์ ์คํํ๊ณ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํ๋ค.
|
| 35 |
+
|
| 36 |
+
Parameters
|
| 37 |
+
----------
|
| 38 |
+
tool_name : str
|
| 39 |
+
์คํํ tool ์ด๋ฆ.
|
| 40 |
+
query : str
|
| 41 |
+
์ฌ์ฉ์ ์์ฒญ ํ
์คํธ.
|
| 42 |
+
context : Dict[str, Any]
|
| 43 |
+
๋์ ์ปจํ
์คํธ (์ด์ tool ๊ฒฐ๊ณผ ํฌํจ).
|
| 44 |
+
|
| 45 |
+
Returns
|
| 46 |
+
-------
|
| 47 |
+
Dict[str, Any]
|
| 48 |
+
tool ์คํ ๊ฒฐ๊ณผ. ์ต์ {"success": bool, ...} ํํ.
|
| 49 |
+
์คํจ ์ {"success": False, "error": str}.
|
| 50 |
+
"""
|
| 51 |
+
...
|
| 52 |
+
|
| 53 |
+
@abstractmethod
|
| 54 |
+
def list_tools(self) -> list[str]:
|
| 55 |
+
"""๋ฑ๋ก๋ tool ์ด๋ฆ ๋ชฉ๋ก์ ๋ฐํํ๋ค."""
|
| 56 |
+
...
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class RegistryExecutorAdapter(ExecutorAdapter):
|
| 60 |
+
"""๊ธฐ์กด tool_registry๋ฅผ ์ฌ์ฌ์ฉํ๋ executor.
|
| 61 |
+
|
| 62 |
+
`tool_registry`๋ `Dict[str, Callable]` ํํ๋ก ์ฃผ์
๋ฐ๋๋ค.
|
| 63 |
+
๊ฐ callable์ `async (query, context, session) -> dict` ์๊ทธ๋์ฒ์ฌ์ผ ํ๋ค.
|
| 64 |
+
๊ธฐ์กด `AgentLoop._execute_tool()` ๋ก์ง์ ๊ณ์นํ๋ค.
|
| 65 |
+
|
| 66 |
+
Parameters
|
| 67 |
+
----------
|
| 68 |
+
tool_registry : Dict[str, Callable]
|
| 69 |
+
tool ์ด๋ฆ -> async callable ๋งคํ.
|
| 70 |
+
session_store : SessionStore
|
| 71 |
+
GovOn ์ธ์
์ ์ฅ์. executor๊ฐ tool ํธ์ถ ์ ์ธ์
์ ์ฃผ์
ํ๋ค.
|
| 72 |
+
default_timeout : float
|
| 73 |
+
tool ์คํ ์ ํ ์๊ฐ (์ด). ๊ธฐ๋ณธ๊ฐ 30.0.
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
def __init__(
|
| 77 |
+
self,
|
| 78 |
+
tool_registry: Dict[str, Callable],
|
| 79 |
+
session_store: Any, # SessionStore (์ํ import ๋ฐฉ์ง๋ฅผ ์ํด Any ์ฌ์ฉ)
|
| 80 |
+
default_timeout: float = 30.0,
|
| 81 |
+
) -> None:
|
| 82 |
+
self._tools = tool_registry
|
| 83 |
+
self._session_store = session_store
|
| 84 |
+
self._default_timeout = default_timeout
|
| 85 |
+
|
| 86 |
+
async def execute(
|
| 87 |
+
self,
|
| 88 |
+
tool_name: str,
|
| 89 |
+
query: str,
|
| 90 |
+
context: Dict[str, Any],
|
| 91 |
+
) -> Dict[str, Any]:
|
| 92 |
+
"""tool์ ์กฐํํ๊ณ ํ์์์ ํฌํจํ์ฌ ์คํํ๋ค.
|
| 93 |
+
|
| 94 |
+
registry์ ๋ฑ๋ก๋์ง ์์ tool์ ๋นMVP capability๋ก ์ฐจ๋จํ๋ค.
|
| 95 |
+
"""
|
| 96 |
+
from src.inference.graph.capabilities.registry import is_mvp_capability
|
| 97 |
+
|
| 98 |
+
# ๋นMVP capability ์ฐจ๋จ
|
| 99 |
+
if not is_mvp_capability(tool_name):
|
| 100 |
+
logger.warning(f"[RegistryExecutorAdapter] ๋นMVP capability ์ฐจ๋จ: {tool_name}")
|
| 101 |
+
return {"success": False, "error": f"๋นMVP capability: {tool_name}"}
|
| 102 |
+
|
| 103 |
+
tool_fn = self._tools.get(tool_name)
|
| 104 |
+
if tool_fn is None:
|
| 105 |
+
return {"success": False, "error": f"๋ฑ๋ก๋์ง ์์ tool: {tool_name}"}
|
| 106 |
+
|
| 107 |
+
session = self._session_store.get_or_create(context.get("session_id"))
|
| 108 |
+
start = time.monotonic()
|
| 109 |
+
try:
|
| 110 |
+
result = await asyncio.wait_for(
|
| 111 |
+
tool_fn(query=query, context=context, session=session),
|
| 112 |
+
timeout=self._default_timeout,
|
| 113 |
+
)
|
| 114 |
+
latency = (time.monotonic() - start) * 1000
|
| 115 |
+
if isinstance(result, dict):
|
| 116 |
+
if "latency_ms" not in result:
|
| 117 |
+
result["latency_ms"] = latency
|
| 118 |
+
result.setdefault("success", True)
|
| 119 |
+
return result
|
| 120 |
+
return {"success": True, "result": result, "latency_ms": latency}
|
| 121 |
+
except asyncio.TimeoutError:
|
| 122 |
+
latency = (time.monotonic() - start) * 1000
|
| 123 |
+
return {
|
| 124 |
+
"success": False,
|
| 125 |
+
"error": f"tool {tool_name} ํ์์์ ({self._default_timeout}์ด)",
|
| 126 |
+
"latency_ms": latency,
|
| 127 |
+
}
|
| 128 |
+
except Exception as exc:
|
| 129 |
+
latency = (time.monotonic() - start) * 1000
|
| 130 |
+
logger.error(f"[RegistryExecutorAdapter] tool {tool_name} ์ค๋ฅ: {exc}", exc_info=True)
|
| 131 |
+
return {"success": False, "error": str(exc), "latency_ms": latency}
|
| 132 |
+
|
| 133 |
+
def list_tools(self) -> list[str]:
|
| 134 |
+
"""๋ฑ๋ก๋ tool ์ด๋ฆ ๋ชฉ๋ก์ ๋ฐํํ๋ค."""
|
| 135 |
+
return list(self._tools.keys())
|
| 136 |
+
|
| 137 |
+
def get_tool_metadata(self, tool_name: str) -> Optional[dict]:
|
| 138 |
+
"""capability์ planner metadata๋ฅผ ๋ฐํํ๋ค.
|
| 139 |
+
|
| 140 |
+
CapabilityBase ์ธ์คํด์ค๊ฐ ๋ฑ๋ก๋ ๊ฒฝ์ฐ metadata ํ๋กํผํฐ์์ ์ ๋ณด๋ฅผ ์ถ์ถํ๊ณ ,
|
| 141 |
+
์ผ๋ฐ callable์ธ ๊ฒฝ์ฐ ์ด๋ฆ๋ง ํฌํจ๋ ๊ธฐ๋ณธ dict๋ฅผ ๋ฐํํ๋ค.
|
| 142 |
+
๋ฑ๋ก๋์ง ์์ tool์ด๋ฉด None์ ๋ฐํํ๋ค.
|
| 143 |
+
|
| 144 |
+
Parameters
|
| 145 |
+
----------
|
| 146 |
+
tool_name : str
|
| 147 |
+
์กฐํํ tool ์ด๋ฆ.
|
| 148 |
+
|
| 149 |
+
Returns
|
| 150 |
+
-------
|
| 151 |
+
Optional[dict]
|
| 152 |
+
tool metadata dict ๋๋ None.
|
| 153 |
+
"""
|
| 154 |
+
tool = self._tools.get(tool_name)
|
| 155 |
+
if tool is None:
|
| 156 |
+
return None
|
| 157 |
+
# CapabilityBase ์ธํฐํ์ด์ค ์ง์
|
| 158 |
+
if hasattr(tool, "metadata"):
|
| 159 |
+
meta = tool.metadata
|
| 160 |
+
return {
|
| 161 |
+
"name": meta.name,
|
| 162 |
+
"description": meta.description,
|
| 163 |
+
"approval_summary": meta.approval_summary,
|
| 164 |
+
"provider": getattr(meta, "provider", ""),
|
| 165 |
+
}
|
| 166 |
+
return {
|
| 167 |
+
"name": tool_name,
|
| 168 |
+
"description": "",
|
| 169 |
+
"approval_summary": "",
|
| 170 |
+
"provider": "",
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
def get_tool_descriptions_for_planner(self) -> List[dict]:
|
| 174 |
+
"""planner๊ฐ ์ฝ์ tool ๋ชฉ๋ก์ ๋จ์ผ ๋ฉ์๋๋ก ๋
ธ์ถํ๋ค.
|
| 175 |
+
|
| 176 |
+
๋ฑ๋ก๋ ๋ชจ๋ tool์ metadata๋ฅผ dict ๋ชฉ๋ก์ผ๋ก ๋ฐํํ๋ค.
|
| 177 |
+
CapabilityBase ์ธ์คํด์ค๋ ํ๋ถํ metadata๋ฅผ, ์ผ๋ฐ callable์
|
| 178 |
+
์ด๋ฆ๋ง ํฌํจ๋ ๊ธฐ๋ณธ dict๋ฅผ ๋ฐํํ๋ค.
|
| 179 |
+
|
| 180 |
+
Returns
|
| 181 |
+
-------
|
| 182 |
+
List[dict]
|
| 183 |
+
๊ฐ tool์ metadata dict ๋ชฉ๋ก.
|
| 184 |
+
"""
|
| 185 |
+
descriptions: List[dict] = []
|
| 186 |
+
for name in self._tools:
|
| 187 |
+
meta = self.get_tool_metadata(name)
|
| 188 |
+
if meta is not None:
|
| 189 |
+
descriptions.append(meta)
|
| 190 |
+
return descriptions
|
src/inference/graph/nodes.py
ADDED
|
@@ -0,0 +1,752 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GovOn LangGraph ๋
ธ๋ ํจ์ ๋ชจ์.
|
| 2 |
+
|
| 3 |
+
Issue #415: LangGraph runtime ๊ธฐ๋ฐ ๋ฐ planner/executor adapter ๊ตฌ์ฑ.
|
| 4 |
+
|
| 5 |
+
6๊ฐ ๋
ธ๋๋ฅผ ์ ์ํ๋ค:
|
| 6 |
+
session_load -> planner -> approval_wait -> tool_execute -> synthesis -> persist
|
| 7 |
+
|
| 8 |
+
๊ฐ ๋
ธ๋๋ `GovOnGraphState`๋ฅผ ์
๋ ฅ์ผ๋ก ๋ฐ๊ณ ์ํ ์
๋ฐ์ดํธ dict๋ฅผ ๋ฐํํ๋ค.
|
| 9 |
+
I/O๊ฐ ํ์ํ ๋
ธ๋๋ async ํจ์์ด๋ฉฐ, `approval_wait` ๋
ธ๋๋ `interrupt()`๋ฅผ
|
| 10 |
+
์ฌ์ฉํ๋ human-in-the-loop ์น์ธ ๊ฒ์ดํธ์ด๋ฏ๋ก sync ํจ์๋ก ์ ์งํ๋ค.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import asyncio
|
| 16 |
+
import time
|
| 17 |
+
from typing import TYPE_CHECKING, Any, Dict, List
|
| 18 |
+
|
| 19 |
+
from langchain_core.messages import AIMessage
|
| 20 |
+
from langgraph.types import interrupt
|
| 21 |
+
from loguru import logger
|
| 22 |
+
|
| 23 |
+
from src.inference.query_builder import (
|
| 24 |
+
build_query_variants,
|
| 25 |
+
build_runtime_query_context,
|
| 26 |
+
resolve_tool_query,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
from .plan_validator import PlanValidationError, ToolPlanValidator
|
| 30 |
+
from .state import ApprovalStatus, GovOnGraphState
|
| 31 |
+
|
| 32 |
+
if TYPE_CHECKING:
|
| 33 |
+
from src.inference.session_context import SessionStore
|
| 34 |
+
|
| 35 |
+
from .executor_adapter import ExecutorAdapter
|
| 36 |
+
from .planner_adapter import PlannerAdapter
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def session_load_node(
|
| 40 |
+
state: GovOnGraphState,
|
| 41 |
+
*,
|
| 42 |
+
session_store: "SessionStore",
|
| 43 |
+
) -> dict:
|
| 44 |
+
"""์ธ์
๋ก๋ ๋
ธ๋.
|
| 45 |
+
|
| 46 |
+
SessionStore์์ ๊ธฐ์กด ์ธ์
์ ๋ถ๋ฌ์ค๊ฑฐ๋ ์ ์ธ์
์ ์์ฑํ๋ค.
|
| 47 |
+
๋ํ ํ์คํ ๋ฆฌ์ tool ์ฌ์ฉ ๊ธฐ๋ก์ accumulated_context์ ์ฃผ์
ํ๋ค.
|
| 48 |
+
|
| 49 |
+
Parameters
|
| 50 |
+
----------
|
| 51 |
+
state : GovOnGraphState
|
| 52 |
+
ํ์ฌ graph state. `session_id`์ `messages`๋ฅผ ์ฝ๋๋ค.
|
| 53 |
+
session_store : SessionStore
|
| 54 |
+
graph config์์ closure๋ก ์ฃผ์
๋๋ ์ธ์
์ ์ฅ์.
|
| 55 |
+
|
| 56 |
+
Returns
|
| 57 |
+
-------
|
| 58 |
+
dict
|
| 59 |
+
`session_id`์ `accumulated_context`๋ฅผ ๊ฐฑ์ ํ๋ค.
|
| 60 |
+
"""
|
| 61 |
+
_start = time.monotonic()
|
| 62 |
+
|
| 63 |
+
session_id: str | None = state.get("session_id")
|
| 64 |
+
session = session_store.get_or_create(session_id)
|
| 65 |
+
|
| 66 |
+
messages = state.get("messages", [])
|
| 67 |
+
query = messages[-1].content if messages else ""
|
| 68 |
+
accumulated_context = build_runtime_query_context(session, query)
|
| 69 |
+
|
| 70 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 71 |
+
logger.debug(
|
| 72 |
+
f"[session_load] session_id={session.session_id} "
|
| 73 |
+
f"query_len={len(query)} latency_ms={_latency_ms}"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
return {
|
| 77 |
+
"session_id": session.session_id,
|
| 78 |
+
"accumulated_context": accumulated_context,
|
| 79 |
+
"node_latencies": {"session_load": _latency_ms},
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
async def planner_node(
|
| 84 |
+
state: GovOnGraphState,
|
| 85 |
+
*,
|
| 86 |
+
planner_adapter: "PlannerAdapter",
|
| 87 |
+
) -> dict:
|
| 88 |
+
"""Planner ๋
ธ๋.
|
| 89 |
+
|
| 90 |
+
PlannerAdapter๋ฅผ ํธ์ถํ์ฌ ๊ตฌ์กฐํ๋ ์คํ ๊ณํ์ ์์ฑํ๋ค.
|
| 91 |
+
LLMPlannerAdapter๊ฐ ๊ธฐ๋ณธ์ผ๋ก ์ฌ์ฉ๋๋ค. SKIP_MODEL_LOAD=true ํ๊ฒฝ์์๋
|
| 92 |
+
RegexPlannerAdapter๊ฐ CI fallback์ผ๋ก ๋์ํ๋ค.
|
| 93 |
+
|
| 94 |
+
Parameters
|
| 95 |
+
----------
|
| 96 |
+
state : GovOnGraphState
|
| 97 |
+
ํ์ฌ graph state. `messages`์ `accumulated_context`๋ฅผ ์ฝ๋๋ค.
|
| 98 |
+
planner_adapter : PlannerAdapter
|
| 99 |
+
graph config์์ closure๋ก ์ฃผ์
๋๋ planner ์ด๋ํฐ.
|
| 100 |
+
|
| 101 |
+
Returns
|
| 102 |
+
-------
|
| 103 |
+
dict
|
| 104 |
+
`task_type`, `goal`, `reason`, `planned_tools`๋ฅผ ๊ฐฑ์ ํ๋ค.
|
| 105 |
+
"""
|
| 106 |
+
_start = time.monotonic()
|
| 107 |
+
|
| 108 |
+
messages = state.get("messages", [])
|
| 109 |
+
context = state.get("accumulated_context", {})
|
| 110 |
+
|
| 111 |
+
plan = await planner_adapter.plan(messages=messages, context=context)
|
| 112 |
+
|
| 113 |
+
validator = ToolPlanValidator()
|
| 114 |
+
try:
|
| 115 |
+
validator.validate(plan)
|
| 116 |
+
except PlanValidationError as e:
|
| 117 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 118 |
+
logger.warning(f"[planner] validation ์คํจ: {e} latency_ms={_latency_ms}")
|
| 119 |
+
return {
|
| 120 |
+
**validator.make_fallback_plan(e),
|
| 121 |
+
"task_type": "",
|
| 122 |
+
"node_latencies": {"planner": _latency_ms},
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
logger.info(
|
| 126 |
+
f"[planner] task_type={plan.task_type.value} "
|
| 127 |
+
f"tools={plan.tools} reason={plan.reason} adapter_mode={plan.adapter_mode}"
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
query_variants = build_query_variants(
|
| 131 |
+
context.get("query", ""),
|
| 132 |
+
tool_names=plan.tools,
|
| 133 |
+
context=context,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 137 |
+
logger.debug(f"[planner] latency_ms={_latency_ms}")
|
| 138 |
+
|
| 139 |
+
return {
|
| 140 |
+
"task_type": plan.task_type.value,
|
| 141 |
+
"goal": plan.goal,
|
| 142 |
+
"reason": plan.reason,
|
| 143 |
+
"planned_tools": plan.tools,
|
| 144 |
+
"tool_summaries": plan.tool_summaries,
|
| 145 |
+
"adapter_mode": plan.adapter_mode,
|
| 146 |
+
"accumulated_context": {
|
| 147 |
+
**context,
|
| 148 |
+
"query_variants": query_variants,
|
| 149 |
+
},
|
| 150 |
+
"node_latencies": {"planner": _latency_ms},
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def approval_wait_node(state: GovOnGraphState) -> dict:
|
| 155 |
+
"""Human-in-the-loop ์น์ธ ๊ฒ์ดํธ.
|
| 156 |
+
|
| 157 |
+
`interrupt()`๋ฅผ ํธ์ถํ์ฌ graph ์คํ์ ์ผ์ ์ ์งํ๋ค.
|
| 158 |
+
FastAPI `/v2/agent/approve` ์๋ํฌ์ธํธ๊ฐ ์ฌ์ฉ์ ์๋ต์ ๋ฐ์ graph๋ฅผ resumeํ๋ค.
|
| 159 |
+
|
| 160 |
+
`interrupt()`๋ LangGraph๊ฐ ์ง์ํ๋ human-in-the-loop ๋ฉ์ปค๋์ฆ์ด๋ค.
|
| 161 |
+
graph ์คํ์ด ๋ฉ์ถ๊ณ , `Command(resume=...)` ํธ์ถ๋ก ์ฌ๊ฐ๋ ๋
|
| 162 |
+
`interrupt()`์ ๋ฐํ๊ฐ์ผ๋ก ์ฌ์ฉ์ ์
๋ ฅ์ด ์ ๋ฌ๋๋ค.
|
| 163 |
+
|
| 164 |
+
Parameters
|
| 165 |
+
----------
|
| 166 |
+
state : GovOnGraphState
|
| 167 |
+
ํ์ฌ graph state. `goal`, `reason`, `planned_tools`๋ฅผ ์ฝ๋๋ค.
|
| 168 |
+
|
| 169 |
+
Returns
|
| 170 |
+
-------
|
| 171 |
+
dict
|
| 172 |
+
`approval_status`๋ฅผ ๊ฐฑ์ ํ๋ค.
|
| 173 |
+
"""
|
| 174 |
+
tool_summaries: List[str] = state.get("tool_summaries") or []
|
| 175 |
+
planned_tools: List[str] = state.get("planned_tools", [])
|
| 176 |
+
|
| 177 |
+
# CLI ํ์์ฉ ๋๊ตฌ ์ค๋ช
: tool_summaries๊ฐ ์์ผ๋ฉด ์ฌ์ฉ, ์์ผ๋ฉด tool ์ด๋ฆ ๊ทธ๋๋ก
|
| 178 |
+
display_tools = tool_summaries if tool_summaries else planned_tools
|
| 179 |
+
|
| 180 |
+
approval_request = {
|
| 181 |
+
"type": "approval_request",
|
| 182 |
+
"goal": state.get("goal", ""),
|
| 183 |
+
"reason": state.get("reason", ""),
|
| 184 |
+
"planned_tools": planned_tools,
|
| 185 |
+
"tool_summaries": display_tools,
|
| 186 |
+
"prompt": (
|
| 187 |
+
f"๋ค์ ์์
์ ์ํํ๊ฒ ์ต๋๋ค:\n\n"
|
| 188 |
+
f" {state.get('goal', '')}\n\n"
|
| 189 |
+
f" ์ด์ : {state.get('reason', '')}\n"
|
| 190 |
+
f" ์ฌ์ฉํ ๋๊ตฌ:\n"
|
| 191 |
+
+ "".join(f" - {s}\n" for s in display_tools)
|
| 192 |
+
+ "\n์น์ธํ์๊ฒ ์ต๋๊น? (์น์ธ/๊ฑฐ์ )"
|
| 193 |
+
),
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
logger.info(f"[approval_wait] interrupt ํธ์ถ: tools={planned_tools}")
|
| 197 |
+
|
| 198 |
+
# interrupt()๋ graph ์คํ์ ๋ฉ์ถ๊ณ , resume ์ ๋ฐํ๊ฐ์ด ๋๋ค.
|
| 199 |
+
# ์: {"approved": True} ๋๋ {"approved": False}
|
| 200 |
+
user_response = interrupt(approval_request)
|
| 201 |
+
|
| 202 |
+
if isinstance(user_response, dict) and user_response.get("approved"):
|
| 203 |
+
logger.info("[approval_wait] ์น์ธ๋จ")
|
| 204 |
+
return {"approval_status": ApprovalStatus.APPROVED.value}
|
| 205 |
+
|
| 206 |
+
# cancel ์ ํธ๊ฐ ์์ผ๋ฉด interrupt_reason์ "user_cancel"๋ก ์ค์
|
| 207 |
+
interrupt_reason = None
|
| 208 |
+
if isinstance(user_response, dict) and user_response.get("cancel"):
|
| 209 |
+
logger.info("[approval_wait] ์ฌ์ฉ์ ์ทจ์ (cancel)")
|
| 210 |
+
interrupt_reason = "user_cancel"
|
| 211 |
+
else:
|
| 212 |
+
logger.info("[approval_wait] ๊ฑฐ์ ๋จ")
|
| 213 |
+
|
| 214 |
+
return {
|
| 215 |
+
"approval_status": ApprovalStatus.REJECTED.value,
|
| 216 |
+
"interrupt_reason": interrupt_reason,
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
async def tool_execute_node(
|
| 221 |
+
state: GovOnGraphState,
|
| 222 |
+
*,
|
| 223 |
+
executor_adapter: "ExecutorAdapter",
|
| 224 |
+
) -> dict:
|
| 225 |
+
"""Tool executor ๋
ธ๋.
|
| 226 |
+
|
| 227 |
+
ExecutorAdapter๋ฅผ ํตํด `planned_tools`๋ฅผ ๋ ๋จ๊ณ๋ก ์คํํ๊ณ
|
| 228 |
+
๊ฒฐ๊ณผ๋ฅผ `accumulated_context`์ ๋์ ํ๋ค.
|
| 229 |
+
|
| 230 |
+
์คํ ์ ๋ต:
|
| 231 |
+
- Phase 1 (๋ณ๋ ฌ): `rag_search`, `api_lookup` ๋ฑ INDEPENDENT_TOOLS๋
|
| 232 |
+
`asyncio.gather()`๋ก ๋์์ ์คํํ๋ค.
|
| 233 |
+
- Phase 2 (์์ฐจ): ๋๋จธ์ง ์์กด ๋๊ตฌ(draft_civil_response ๋ฑ)๋ Phase 1
|
| 234 |
+
๊ฒฐ๊ณผ๊ฐ ๋์ ๋ accumulated_context๋ฅผ ์ฌ์ฉํ์ฌ ์์๋๋ก ์คํํ๋ค.
|
| 235 |
+
|
| 236 |
+
Parameters
|
| 237 |
+
----------
|
| 238 |
+
state : GovOnGraphState
|
| 239 |
+
ํ์ฌ graph state. `planned_tools`, `accumulated_context`๋ฅผ ์ฝ๋๋ค.
|
| 240 |
+
executor_adapter : ExecutorAdapter
|
| 241 |
+
graph config์์ closure๋ก ์ฃผ์
๋๋ executor ์ด๋ํฐ.
|
| 242 |
+
|
| 243 |
+
Returns
|
| 244 |
+
-------
|
| 245 |
+
dict
|
| 246 |
+
`tool_results`, `accumulated_context`, `node_latencies`๋ฅผ ๊ฐฑ์ ํ๋ค.
|
| 247 |
+
`node_latencies`์๋ `"tool_execute"` ํค๋ก ๋
ธ๋ ์ ์ฒด ๋ ์ดํด์,
|
| 248 |
+
`"tool:<tool_name>"` ํค๋ก ๊ฐ๋ณ ๋๊ตฌ ๋ ์ดํด์๊ฐ ํฌํจ๋๋ค.
|
| 249 |
+
"""
|
| 250 |
+
_start = time.monotonic()
|
| 251 |
+
|
| 252 |
+
# approval guard: ์น์ธ ์์ด tool ์คํ ์ฐจ๋จ
|
| 253 |
+
approval_status = state.get("approval_status", "")
|
| 254 |
+
if approval_status != ApprovalStatus.APPROVED.value:
|
| 255 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 256 |
+
logger.warning(
|
| 257 |
+
f"[tool_execute] ์น์ธ๋์ง ์์ ์ํ์์ ์คํ ์๋ ์ฐจ๋จ: approval_status={approval_status!r}"
|
| 258 |
+
)
|
| 259 |
+
return {
|
| 260 |
+
"tool_results": {},
|
| 261 |
+
"accumulated_context": dict(state.get("accumulated_context", {})),
|
| 262 |
+
"error": f"tool ์คํ ์ฐจ๋จ: ์น์ธ ํ์ (ํ์ฌ ์ํ: {approval_status!r})",
|
| 263 |
+
"node_latencies": {"tool_execute": _latency_ms},
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
planned_tools: list[str] = state.get("planned_tools", [])
|
| 267 |
+
accumulated: Dict[str, Any] = dict(state.get("accumulated_context", {}))
|
| 268 |
+
|
| 269 |
+
# planned_tools๊ฐ ๋น์ด์๋ ๊ฒฝ์ฐ (validation ์คํจ fallback ๋ฑ)
|
| 270 |
+
if not planned_tools:
|
| 271 |
+
logger.warning("[tool_execute] planned_tools๊ฐ ๋น์ด์์ด ์คํ ๊ฑด๋๋")
|
| 272 |
+
return {
|
| 273 |
+
"tool_results": {},
|
| 274 |
+
"accumulated_context": accumulated,
|
| 275 |
+
"node_latencies": {"tool_execute": round((time.monotonic() - _start) * 1000, 2)},
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
tool_results: Dict[str, Any] = {}
|
| 279 |
+
tool_latencies: Dict[str, float] = {}
|
| 280 |
+
|
| 281 |
+
# --- ๋
๋ฆฝ ๋๊ตฌ์ ์์กด ๋๊ตฌ๋ฅผ ๋ถ๋ฆฌํ์ฌ ๋ณ๋ ฌ/์์ฐจ ์คํ ---
|
| 282 |
+
# ์๋ก์ด ๋
๋ฆฝ capability๋ฅผ ์ถ๊ฐํ ๋๋ ์ด ์งํฉ์๋ ๋ฑ๋กํด์ผ ํ๋ค.
|
| 283 |
+
# ๋
๋ฆฝ ๋๊ตฌ๋ ๋ค๋ฅธ ๋๊ตฌ์ ์คํ ๊ฒฐ๊ณผ(accumulated_context)์ ์์กดํ์ง ์์
|
| 284 |
+
# ๋ณ๋ ฌ ์คํ์ด ์์ ํ capability๋ฅผ ์๋ฏธํ๋ค.
|
| 285 |
+
INDEPENDENT_TOOLS = {"rag_search", "api_lookup"}
|
| 286 |
+
|
| 287 |
+
independent = [t for t in planned_tools if t in INDEPENDENT_TOOLS]
|
| 288 |
+
dependent = [t for t in planned_tools if t not in INDEPENDENT_TOOLS]
|
| 289 |
+
|
| 290 |
+
# Phase 1: ๋
๋ฆฝ ๋๊ตฌ ๋ณ๋ ฌ ์คํ
|
| 291 |
+
# ์ฃผ์: _run_tool ํด๋ก์ ๋ด์์ accumulated๋ ์ฝ๊ธฐ ์ ์ฉ์ผ๋ก ์ทจ๊ธํด์ผ ํ๋ค.
|
| 292 |
+
# ๋ณ๋ ฌ ์คํ ์ค accumulated๋ฅผ ๋ณ๊ฒฝํ๋ฉด race condition์ด ๋ฐ์ํ ์ ์๋ค.
|
| 293 |
+
# accumulated ๊ฐฑ์ ์ gather() ์๋ฃ ํ ๋ฉ์ธ ๋ฃจํ์์ ์์๋๋ก ์ํํ๋ค.
|
| 294 |
+
if independent:
|
| 295 |
+
|
| 296 |
+
async def _run_tool(name: str) -> tuple[str, Dict[str, Any], float]:
|
| 297 |
+
t0 = time.monotonic()
|
| 298 |
+
execution_query = resolve_tool_query(name, accumulated)
|
| 299 |
+
logger.info(f"[tool_execute] ๋ณ๋ ฌ ์คํ: {name}")
|
| 300 |
+
result = await executor_adapter.execute(
|
| 301 |
+
tool_name=name,
|
| 302 |
+
query=execution_query,
|
| 303 |
+
context=dict(accumulated),
|
| 304 |
+
)
|
| 305 |
+
latency = round((time.monotonic() - t0) * 1000, 2)
|
| 306 |
+
return name, result, latency
|
| 307 |
+
|
| 308 |
+
results = await asyncio.gather(
|
| 309 |
+
*[_run_tool(name) for name in independent],
|
| 310 |
+
return_exceptions=True,
|
| 311 |
+
)
|
| 312 |
+
for i, item in enumerate(results):
|
| 313 |
+
if isinstance(item, Exception):
|
| 314 |
+
failed_tool = independent[i]
|
| 315 |
+
logger.opt(exception=item).error(
|
| 316 |
+
f"[tool_execute] ๋ณ๋ ฌ ์คํ ์คํจ: tool={failed_tool}"
|
| 317 |
+
)
|
| 318 |
+
continue
|
| 319 |
+
name, result, latency = item
|
| 320 |
+
tool_results[name] = result
|
| 321 |
+
tool_latencies[name] = latency
|
| 322 |
+
if result.get("success", True):
|
| 323 |
+
accumulated[name] = result
|
| 324 |
+
|
| 325 |
+
# Phase 2: ์์กด ๋๊ตฌ ์์ฐจ ์คํ (accumulated context ํ์)
|
| 326 |
+
for name in dependent:
|
| 327 |
+
t0 = time.monotonic()
|
| 328 |
+
execution_query = resolve_tool_query(name, accumulated)
|
| 329 |
+
logger.info(f"[tool_execute] ์์ฐจ ์คํ: {name}")
|
| 330 |
+
try:
|
| 331 |
+
result = await executor_adapter.execute(
|
| 332 |
+
tool_name=name,
|
| 333 |
+
query=execution_query,
|
| 334 |
+
context=accumulated,
|
| 335 |
+
)
|
| 336 |
+
except Exception as exc:
|
| 337 |
+
latency = round((time.monotonic() - t0) * 1000, 2)
|
| 338 |
+
logger.opt(exception=exc).error(f"[tool_execute] ์์ฐจ ์คํ ์คํจ: tool={name}")
|
| 339 |
+
tool_results[name] = {
|
| 340 |
+
"success": False,
|
| 341 |
+
"error": f"{type(exc).__name__}: {exc}",
|
| 342 |
+
}
|
| 343 |
+
tool_latencies[name] = latency
|
| 344 |
+
continue
|
| 345 |
+
latency = round((time.monotonic() - t0) * 1000, 2)
|
| 346 |
+
tool_results[name] = result
|
| 347 |
+
tool_latencies[name] = latency
|
| 348 |
+
if result.get("success", True):
|
| 349 |
+
accumulated[name] = result
|
| 350 |
+
|
| 351 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 352 |
+
logger.info(
|
| 353 |
+
f"[tool_execute] ์๋ฃ: {list(tool_results.keys())} "
|
| 354 |
+
f"latency_ms={_latency_ms} per_tool={tool_latencies}"
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
# node_latencies์ ๋
ธ๋ ์ ์ฒด ๋ ์ดํด์์ ๊ฐ๋ณ tool ๋ ์ดํด์๋ฅผ ํจ๊ป ๊ธฐ๋กํ๋ค.
|
| 358 |
+
# tool ๋ ์ดํด์๋ "tool:<tool_name>" ์ ๋์ฌ๋ก ๊ตฌ๋ถํ๋ค.
|
| 359 |
+
merged_latencies: Dict[str, float] = {"tool_execute": _latency_ms}
|
| 360 |
+
for tool_name, tool_lat in tool_latencies.items():
|
| 361 |
+
merged_latencies[f"tool:{tool_name}"] = tool_lat
|
| 362 |
+
|
| 363 |
+
return {
|
| 364 |
+
"tool_results": tool_results,
|
| 365 |
+
"accumulated_context": accumulated,
|
| 366 |
+
"node_latencies": merged_latencies,
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
async def synthesis_node(state: GovOnGraphState) -> dict:
|
| 371 |
+
"""๊ฒฐ๊ณผ ์ข
ํฉ ๋
ธ๋.
|
| 372 |
+
|
| 373 |
+
tool_results์ accumulated_context๋ฅผ ์ข
ํฉํ์ฌ ์ต์ข
์๋ต ํ
์คํธ๋ฅผ ์์ฑํ๋ค.
|
| 374 |
+
๊ธฐ์กด AgentLoop._extract_final_text() ๋ก์ง์ ๊ณ์นํ๋ค.
|
| 375 |
+
|
| 376 |
+
Parameters
|
| 377 |
+
----------
|
| 378 |
+
state : GovOnGraphState
|
| 379 |
+
ํ์ฌ graph state. `tool_results`, `accumulated_context`, `task_type`์ ์ฝ๋๋ค.
|
| 380 |
+
|
| 381 |
+
Returns
|
| 382 |
+
-------
|
| 383 |
+
dict
|
| 384 |
+
`final_text`, `evidence_items`, `messages`(AIMessage ์ถ๊ฐ)๋ฅผ ๊ฐฑ์ ํ๋ค.
|
| 385 |
+
"""
|
| 386 |
+
_start = time.monotonic()
|
| 387 |
+
|
| 388 |
+
accumulated = state.get("accumulated_context", {})
|
| 389 |
+
task_type = state.get("task_type", "")
|
| 390 |
+
|
| 391 |
+
final_text = _extract_final_text(accumulated, task_type)
|
| 392 |
+
evidence_items = _collect_evidence_items(accumulated)
|
| 393 |
+
|
| 394 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 395 |
+
logger.info(
|
| 396 |
+
f"[synthesis] final_text_len={len(final_text)} evidence_items={len(evidence_items)} latency_ms={_latency_ms}"
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
return {
|
| 400 |
+
"final_text": final_text,
|
| 401 |
+
"evidence_items": evidence_items,
|
| 402 |
+
"messages": [AIMessage(content=final_text)],
|
| 403 |
+
"node_latencies": {"synthesis": _latency_ms},
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
async def persist_node(
|
| 408 |
+
state: GovOnGraphState,
|
| 409 |
+
*,
|
| 410 |
+
session_store: "SessionStore",
|
| 411 |
+
) -> dict:
|
| 412 |
+
"""์์ํ ๋
ธ๋.
|
| 413 |
+
|
| 414 |
+
SessionStore์ ๋ํ ํด๊ณผ tool ์คํ ๊ธฐ๋ก์ ์ ์ฅํ๋ค.
|
| 415 |
+
๊ธฐ์กด SessionContext.add_turn / add_tool_run ๋ก์ง์ ๊ณ์นํ๋ค.
|
| 416 |
+
|
| 417 |
+
Parameters
|
| 418 |
+
----------
|
| 419 |
+
state : GovOnGraphState
|
| 420 |
+
ํ์ฌ graph state. ์ ์ฒด state๋ฅผ ์ฝ์ด ์ ์ฅํ๋ค.
|
| 421 |
+
session_store : SessionStore
|
| 422 |
+
graph config์์ closure๋ก ์ฃผ์
๋๋ ์ธ์
์ ์ฅ์.
|
| 423 |
+
|
| 424 |
+
Returns
|
| 425 |
+
-------
|
| 426 |
+
dict
|
| 427 |
+
side effect๋ก DB์ ์ ์ฅํ๊ณ , `node_latencies` ์
๋ฐ์ดํธ๋ฅผ ๋ฐํํ๋ค.
|
| 428 |
+
"""
|
| 429 |
+
_start = time.monotonic()
|
| 430 |
+
|
| 431 |
+
session_id: str | None = state.get("session_id")
|
| 432 |
+
session = session_store.get_or_create(session_id)
|
| 433 |
+
|
| 434 |
+
# ์ฌ์ฉ์ ์
๋ ฅ ์ ์ฅ (messages[0]์ด ์ต์ด ์ฌ์ฉ์ ๋ฉ์์ง)
|
| 435 |
+
messages = state.get("messages", [])
|
| 436 |
+
if messages:
|
| 437 |
+
user_msg = messages[0]
|
| 438 |
+
session.add_turn("user", user_msg.content)
|
| 439 |
+
|
| 440 |
+
# --- graph_run ๊ธฐ๋ก (plan + approval + executed capabilities) ---
|
| 441 |
+
request_id: str = state.get("request_id", "")
|
| 442 |
+
approval_status: str = state.get("approval_status", "")
|
| 443 |
+
planned_tools: List[str] = state.get("planned_tools", [])
|
| 444 |
+
tool_results: Dict[str, Any] = state.get("tool_results", {})
|
| 445 |
+
|
| 446 |
+
# ์น์ธ๋ ๊ฒฝ์ฐ์๋ง ์คํ๋ ๋๊ตฌ ๋ชฉ๋ก์ ๊ธฐ๋ก, ๊ฑฐ์ ์ ๋น ๋ฆฌ์คํธ
|
| 447 |
+
executed_capabilities: List[str] = (
|
| 448 |
+
[name for name in planned_tools if name in tool_results]
|
| 449 |
+
if approval_status == ApprovalStatus.APPROVED.value
|
| 450 |
+
else []
|
| 451 |
+
)
|
| 452 |
+
|
| 453 |
+
plan_summary = (
|
| 454 |
+
f"[{state.get('task_type', '')}] {state.get('goal', '')} "
|
| 455 |
+
f"| ์ด์ : {state.get('reason', '')} | tools: {planned_tools}"
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
total_latency_ms = sum(r.get("latency_ms", 0.0) for r in tool_results.values())
|
| 459 |
+
|
| 460 |
+
# interrupt_reason์ด ์์ผ๋ฉด "interrupted", ๊ฑฐ์ ์ด๋ฉด "rejected", ๊ทธ ์ธ "completed"
|
| 461 |
+
interrupt_reason: str | None = state.get("interrupt_reason")
|
| 462 |
+
if interrupt_reason:
|
| 463 |
+
graph_status = "interrupted"
|
| 464 |
+
elif approval_status == ApprovalStatus.REJECTED.value:
|
| 465 |
+
graph_status = "rejected"
|
| 466 |
+
else:
|
| 467 |
+
graph_status = "completed"
|
| 468 |
+
|
| 469 |
+
session.add_graph_run(
|
| 470 |
+
request_id=request_id,
|
| 471 |
+
plan_summary=plan_summary,
|
| 472 |
+
approval_status=approval_status,
|
| 473 |
+
executed_capabilities=executed_capabilities,
|
| 474 |
+
status=graph_status,
|
| 475 |
+
total_latency_ms=total_latency_ms,
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
# tool ์คํ ๊ธฐ๋ก ์ ์ฅ (graph_run_request_id๋ก ์ฐ๊ฒฐ)
|
| 479 |
+
for name, result in tool_results.items():
|
| 480 |
+
session.add_tool_run(
|
| 481 |
+
tool=name,
|
| 482 |
+
success=result.get("success", True),
|
| 483 |
+
graph_run_request_id=request_id,
|
| 484 |
+
latency_ms=result.get("latency_ms", 0.0),
|
| 485 |
+
error=result.get("error"),
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
+
# ์ด์์คํดํธ ์๋ต ์ ์ฅ
|
| 489 |
+
final_text = state.get("final_text", "")
|
| 490 |
+
if final_text:
|
| 491 |
+
session.add_turn("assistant", final_text)
|
| 492 |
+
|
| 493 |
+
_latency_ms = round((time.monotonic() - _start) * 1000, 2)
|
| 494 |
+
logger.debug(
|
| 495 |
+
f"[persist] session_id={session.session_id} "
|
| 496 |
+
f"graph_run={request_id} saved latency_ms={_latency_ms}"
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
return {"node_latencies": {"persist": _latency_ms}}
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
def _safe_score(item: dict) -> float:
|
| 503 |
+
"""evidence item์ score๋ฅผ ์์ ํ๊ฒ float์ผ๋ก ๋ณํํ๋ค.
|
| 504 |
+
|
| 505 |
+
์ธ๋ถ API ๊ฒฐ๊ณผ์ score๊ฐ ๋ฌธ์์ด์ด๊ฑฐ๋ None์ผ ์ ์์ผ๋ฏ๋ก
|
| 506 |
+
๋ณํ ์คํจ ์ 0.0์ ๋ฐํํ๋ค.
|
| 507 |
+
"""
|
| 508 |
+
try:
|
| 509 |
+
return float(item.get("score", 0.0))
|
| 510 |
+
except (ValueError, TypeError):
|
| 511 |
+
return 0.0
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
# accumulated ์ปจํ
์คํธ ํ์ ์ ์คํตํ ๋ฉํ ํค ๋ชฉ๋ก (๋ชจ๋ ๋ ๋ฒจ ์์)
|
| 515 |
+
_CONTEXT_META_KEYS: frozenset[str] = frozenset(
|
| 516 |
+
{
|
| 517 |
+
"session_context",
|
| 518 |
+
"query",
|
| 519 |
+
"query_variants",
|
| 520 |
+
"previous_user_query",
|
| 521 |
+
"previous_assistant_response",
|
| 522 |
+
"recent_tool_summary",
|
| 523 |
+
}
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
def _collect_evidence_items(accumulated: Dict[str, Any]) -> list[dict]:
|
| 528 |
+
"""accumulated ์ปจํ
์คํธ์์ ๋ชจ๋ EvidenceItem dict๋ฅผ ์์งํ๋ค.
|
| 529 |
+
|
| 530 |
+
๊ฐ tool ๊ฒฐ๊ณผ์ evidence.items ํ๋๋ฅผ ํ์ํ์ฌ ํ๋์ ๋ฆฌ์คํธ๋ก ํฉ์ฐํ๋ค.
|
| 531 |
+
์ต๋ 10๊ฐ๊น์ง ๋ฐํํ๋ฉฐ, score ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ ฌํ๋ค.
|
| 532 |
+
|
| 533 |
+
Parameters
|
| 534 |
+
----------
|
| 535 |
+
accumulated : Dict[str, Any]
|
| 536 |
+
tool ๊ฒฐ๊ณผ๊ฐ ๋์ ๋ ์ปจํ
์คํธ dict.
|
| 537 |
+
|
| 538 |
+
Returns
|
| 539 |
+
-------
|
| 540 |
+
list[dict]
|
| 541 |
+
EvidenceItem.to_dict() ํํ์ dict ๋ฆฌ์คํธ.
|
| 542 |
+
"""
|
| 543 |
+
items: list[dict] = []
|
| 544 |
+
for key, payload in accumulated.items():
|
| 545 |
+
if key in _CONTEXT_META_KEYS:
|
| 546 |
+
continue
|
| 547 |
+
if not isinstance(payload, dict):
|
| 548 |
+
continue
|
| 549 |
+
ev = payload.get("evidence")
|
| 550 |
+
if isinstance(ev, dict) and ev.get("items"):
|
| 551 |
+
for item in ev["items"]:
|
| 552 |
+
if isinstance(item, dict):
|
| 553 |
+
items.append(item)
|
| 554 |
+
# score ๋ด๋ฆผ์ฐจ์, ์ต๋ 10๊ฐ โ ์ธ๋ถ ๊ฐ์ด๋ฏ๋ก _safe_score๋ก ๋ฐฉ์ด์ ๋ณํ
|
| 555 |
+
items.sort(key=_safe_score, reverse=True)
|
| 556 |
+
return items[:10]
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def _safe_score(item: dict) -> float:
|
| 560 |
+
"""evidence item์ score๋ฅผ ์์ ํ๊ฒ float์ผ๋ก ๋ณํํ๋ค.
|
| 561 |
+
|
| 562 |
+
์ธ๋ถ API ๊ฒฐ๊ณผ์ score๊ฐ ๋ฌธ์์ด์ด๊ฑฐ๋ None์ผ ์ ์์ผ๏ฟฝ๏ฟฝ๋ก
|
| 563 |
+
๋ณํ ์คํจ ์ 0.0์ ๋ฐํํ๋ค.
|
| 564 |
+
"""
|
| 565 |
+
try:
|
| 566 |
+
return float(item.get("score", 0.0))
|
| 567 |
+
except (ValueError, TypeError):
|
| 568 |
+
return 0.0
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
# accumulated ์ปจํ
์คํธ ํ์ ์ ์คํตํ ๋ฉํ ํค ๋ชฉ๋ก (๋ชจ๋ ๋ ๋ฒจ ์์)
|
| 572 |
+
_CONTEXT_META_KEYS: frozenset[str] = frozenset(
|
| 573 |
+
{
|
| 574 |
+
"session_context",
|
| 575 |
+
"query",
|
| 576 |
+
"query_variants",
|
| 577 |
+
"previous_user_query",
|
| 578 |
+
"previous_assistant_response",
|
| 579 |
+
"recent_tool_summary",
|
| 580 |
+
}
|
| 581 |
+
)
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
def _collect_evidence_items(accumulated: Dict[str, Any]) -> list[dict]:
|
| 585 |
+
"""accumulated ์ปจํ
์คํธ์์ ๋ชจ๋ EvidenceItem dict๋ฅผ ์์งํ๋ค.
|
| 586 |
+
|
| 587 |
+
๊ฐ tool ๊ฒฐ๊ณผ์ evidence.items ํ๋๋ฅผ ํ์ํ์ฌ ํ๋์ ๋ฆฌ์คํธ๋ก ํฉ์ฐํ๋ค.
|
| 588 |
+
์ต๋ 10๊ฐ๊น์ง ๋ฐํํ๋ฉฐ, score ๋ด๋ฆผ์ฐจ์์ผ๋ก ์ ๋ ฌํ๋ค.
|
| 589 |
+
|
| 590 |
+
Parameters
|
| 591 |
+
----------
|
| 592 |
+
accumulated : Dict[str, Any]
|
| 593 |
+
tool ๊ฒฐ๊ณผ๊ฐ ๋์ ๋ ์ปจํ
์คํธ dict.
|
| 594 |
+
|
| 595 |
+
Returns
|
| 596 |
+
-------
|
| 597 |
+
list[dict]
|
| 598 |
+
EvidenceItem.to_dict() ํํ์ dict ๋ฆฌ์คํธ.
|
| 599 |
+
"""
|
| 600 |
+
items: list[dict] = []
|
| 601 |
+
for key, payload in accumulated.items():
|
| 602 |
+
if key in _CONTEXT_META_KEYS:
|
| 603 |
+
continue
|
| 604 |
+
if not isinstance(payload, dict):
|
| 605 |
+
continue
|
| 606 |
+
ev = payload.get("evidence")
|
| 607 |
+
if isinstance(ev, dict) and ev.get("items"):
|
| 608 |
+
for item in ev["items"]:
|
| 609 |
+
if isinstance(item, dict):
|
| 610 |
+
items.append(item)
|
| 611 |
+
# score ๋ด๋ฆผ์ฐจ์, ์ต๋ 10๊ฐ โ ์ธ๋ถ ๊ฐ์ด๋ฏ๋ก _safe_score๋ก ๋ฐฉ์ด์ ๋ณํ
|
| 612 |
+
items.sort(key=_safe_score, reverse=True)
|
| 613 |
+
return items[:10]
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
def _extract_final_text(accumulated: Dict[str, Any], task_type: str) -> str:
|
| 617 |
+
"""tool ๊ฒฐ๊ณผ๋ฅผ ์ข
ํฉํ์ฌ ์ต์ข
ํ
์คํธ๋ฅผ ์์ฑํ๋ค.
|
| 618 |
+
|
| 619 |
+
๊ธฐ์กด AgentLoop._extract_final_text()๋ฅผ ๊ณ์นํ๋,
|
| 620 |
+
task_type์ ๊ธฐ๋ฐ์ผ๋ก ๋ถ๊ธฐํ๋ค.
|
| 621 |
+
|
| 622 |
+
append_evidence ํ์
์ผ ๋๋ ๊ธฐ์กด ๋ต๋ณ(previous_assistant_response)์
|
| 623 |
+
evidence ์น์
์์ prependํ์ฌ ๋ต๋ณ์ ๋ณด๊ฐํ๋ค.
|
| 624 |
+
|
| 625 |
+
Parameters
|
| 626 |
+
----------
|
| 627 |
+
accumulated : Dict[str, Any]
|
| 628 |
+
tool ๊ฒฐ๊ณผ๊ฐ ๋์ ๋ ์ปจํ
์คํธ dict.
|
| 629 |
+
task_type : str
|
| 630 |
+
TaskType.value (์: "draft_response").
|
| 631 |
+
|
| 632 |
+
Returns
|
| 633 |
+
-------
|
| 634 |
+
str
|
| 635 |
+
์ต์ข
์๋ต ํ
์คํธ.
|
| 636 |
+
"""
|
| 637 |
+
# append_evidence: ๊ธฐ์กด ๋ต๋ณ ์์ ๊ทผ๊ฑฐ ์น์
์ ์ถ๊ฐํ๋ค
|
| 638 |
+
if task_type == "append_evidence":
|
| 639 |
+
previous_draft = str(accumulated.get("previous_assistant_response", "")).strip()
|
| 640 |
+
evidence_section = _build_evidence_section(accumulated)
|
| 641 |
+
if previous_draft and evidence_section:
|
| 642 |
+
return f"{previous_draft}\n\n{evidence_section}"
|
| 643 |
+
if evidence_section:
|
| 644 |
+
return evidence_section
|
| 645 |
+
if previous_draft:
|
| 646 |
+
return previous_draft
|
| 647 |
+
|
| 648 |
+
# 1. append_evidence ๋๋ draft_civil_response์ ์ง์ ํ
์คํธ๊ฐ ์์ผ๋ฉด ์ฌ์ฉ
|
| 649 |
+
for key in ("append_evidence", "draft_civil_response"):
|
| 650 |
+
payload = accumulated.get(key, {})
|
| 651 |
+
if isinstance(payload, dict) and payload.get("text"):
|
| 652 |
+
return str(payload["text"])
|
| 653 |
+
|
| 654 |
+
# 2. ๋ชจ๋ accumulated ๊ฒฐ๊ณผ์์ ํ
์คํธ ํ์
|
| 655 |
+
for key, payload in accumulated.items():
|
| 656 |
+
if key in ("session_context", "query"):
|
| 657 |
+
continue
|
| 658 |
+
if isinstance(payload, dict) and payload.get("text"):
|
| 659 |
+
return str(payload["text"])
|
| 660 |
+
|
| 661 |
+
# 3. ๊ฐ๋ณ ๊ฒฐ๊ณผ ์กฐํฉ โ evidence ํ๋๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉ
|
| 662 |
+
parts: list[str] = []
|
| 663 |
+
|
| 664 |
+
# evidence ํ๋ ๊ธฐ๋ฐ ์ถ์ฒ ๋ชฉ๋ก ์์ฑ (source-specific branching ์์ด)
|
| 665 |
+
all_evidence_items: list[dict] = []
|
| 666 |
+
for key, payload in accumulated.items():
|
| 667 |
+
if key in ("session_context", "query"):
|
| 668 |
+
continue
|
| 669 |
+
if isinstance(payload, dict):
|
| 670 |
+
ev = payload.get("evidence")
|
| 671 |
+
if isinstance(ev, dict) and ev.get("items"):
|
| 672 |
+
all_evidence_items.extend(ev["items"])
|
| 673 |
+
|
| 674 |
+
if all_evidence_items:
|
| 675 |
+
lines = ["[์ฐธ์กฐ ๊ทผ๊ฑฐ]"]
|
| 676 |
+
for item in all_evidence_items[:5]:
|
| 677 |
+
source_type = item.get("source_type", "")
|
| 678 |
+
title = item.get("title", "")
|
| 679 |
+
excerpt = item.get("excerpt", "")[:120]
|
| 680 |
+
label = (
|
| 681 |
+
"[๋ก์ปฌ]" if source_type == "rag" else "[์ธ๋ถ]" if source_type == "api" else "[์์ฑ]"
|
| 682 |
+
)
|
| 683 |
+
if title:
|
| 684 |
+
lines.append(f"- {label} {title}: {excerpt}")
|
| 685 |
+
elif excerpt:
|
| 686 |
+
lines.append(f"- {label} {excerpt}")
|
| 687 |
+
if len(lines) > 1:
|
| 688 |
+
parts.append("\n".join(lines))
|
| 689 |
+
|
| 690 |
+
# evidence๊ฐ ์๋ ๊ฒฝ์ฐ legacy fallback
|
| 691 |
+
if not parts:
|
| 692 |
+
rag_data = accumulated.get("rag_search", {})
|
| 693 |
+
if isinstance(rag_data, dict) and rag_data.get("results"):
|
| 694 |
+
lines = ["[๋ก์ปฌ ๋ฌธ์ ๊ทผ๊ฑฐ]"]
|
| 695 |
+
for item in rag_data["results"][:3]:
|
| 696 |
+
title = item.get("title", "")
|
| 697 |
+
content = str(item.get("content", ""))[:120]
|
| 698 |
+
lines.append(f"- {title}: {content}")
|
| 699 |
+
parts.append("\n".join(lines))
|
| 700 |
+
|
| 701 |
+
api_data = accumulated.get("api_lookup", {})
|
| 702 |
+
if isinstance(api_data, dict) and api_data.get("context_text"):
|
| 703 |
+
parts.append(api_data["context_text"])
|
| 704 |
+
|
| 705 |
+
return "\n\n".join(parts) if parts else "์์ฒญ์ ์ฒ๋ฆฌํ ์ ์์ต๋๋ค."
|
| 706 |
+
|
| 707 |
+
|
| 708 |
+
def _build_evidence_section(accumulated: Dict[str, Any]) -> str:
|
| 709 |
+
"""accumulated์์ ๊ทผ๊ฑฐ ์น์
ํ
์คํธ๋ฅผ ๊ตฌ์ฑํ๋ค.
|
| 710 |
+
|
| 711 |
+
append_evidence capability์ ์ง์ ํ
์คํธ๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ ,
|
| 712 |
+
์์ผ๋ฉด evidence items์์ ๊ตฌ์กฐํ๋ ํ
์คํธ๋ฅผ ์์ฑํ๋ค.
|
| 713 |
+
|
| 714 |
+
๊ณ์ฝ(contract):
|
| 715 |
+
- ์ด ํจ์๋ **๊ทผ๊ฑฐ ์น์
๋ง** ๋ฐํํ๋ค. ๊ธฐ์กด ๋ต๋ณ(previous_draft)์ ํฌํจํ์ง ์๋๋ค.
|
| 716 |
+
- ํธ์ถ์(_extract_final_text)๊ฐ previous_draft์ ๋ณํฉํ์ฌ ๋ฐํํ๋ค.
|
| 717 |
+
- AppendEvidenceCapability.execute()์ text ํ๋๋ ๊ทผ๊ฑฐ ์น์
๋ง ๋ด์์ผ ํ๋ค.
|
| 718 |
+
(๊ธฐ์กด ๋ต๋ณ์ ํฌํจํ ์์ ์๋ต์ text์ ๋ฃ์ผ๋ฉด _extract_final_text์์ ์ค๋ณต๋๋ค.)
|
| 719 |
+
|
| 720 |
+
Parameters
|
| 721 |
+
----------
|
| 722 |
+
accumulated : Dict[str, Any]
|
| 723 |
+
tool ๊ฒฐ๊ณผ๊ฐ ๋์ ๋ ์ปจํ
์คํธ dict.
|
| 724 |
+
|
| 725 |
+
Returns
|
| 726 |
+
-------
|
| 727 |
+
str
|
| 728 |
+
๊ทผ๊ฑฐ ์น์
ํ
์คํธ. ๊ทผ๊ฑฐ๊ฐ ์์ผ๋ฉด ๋น ๋ฌธ์์ด.
|
| 729 |
+
"""
|
| 730 |
+
# append_evidence capability์ ์ง์ ์์ฑ ํ
์คํธ ์ฐ์ ์ฌ์ฉ
|
| 731 |
+
# ์ด ํ
์คํธ๋ ๊ทผ๊ฑฐ ์น์
๋ง ๋ด์์ผ ํ๋ค (๊ธฐ์กด ๋ต๋ณ ํฌํจ ๊ธ์ง).
|
| 732 |
+
ae_payload = accumulated.get("append_evidence", {})
|
| 733 |
+
if isinstance(ae_payload, dict) and ae_payload.get("text"):
|
| 734 |
+
return str(ae_payload["text"])
|
| 735 |
+
|
| 736 |
+
# evidence items์์ ๊ตฌ์กฐํ ํ
์คํธ ์์ฑ
|
| 737 |
+
items = _collect_evidence_items(accumulated)
|
| 738 |
+
if not items:
|
| 739 |
+
return ""
|
| 740 |
+
|
| 741 |
+
lines = ["[์ฐธ์กฐ ๊ทผ๊ฑฐ]"]
|
| 742 |
+
for item in items[:5]:
|
| 743 |
+
source_type = item.get("source_type", "")
|
| 744 |
+
title = item.get("title", "")
|
| 745 |
+
excerpt = item.get("excerpt", "")[:120]
|
| 746 |
+
label = "[๋ก์ปฌ]" if source_type == "rag" else "[์ธ๋ถ]" if source_type == "api" else "[์์ฑ]"
|
| 747 |
+
if title:
|
| 748 |
+
lines.append(f"- {label} {title}: {excerpt}")
|
| 749 |
+
elif excerpt:
|
| 750 |
+
lines.append(f"- {label} {excerpt}")
|
| 751 |
+
|
| 752 |
+
return "\n".join(lines) if len(lines) > 1 else ""
|