Spaces:
Paused
Paused
| import asyncio | |
| import json | |
| import os | |
| import re | |
| import time | |
| import uuid | |
| from contextlib import asynccontextmanager | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Any, AsyncGenerator, Dict, List, Optional | |
| from fastapi import Depends, FastAPI, HTTPException, Request, Security | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse, StreamingResponse | |
| from fastapi.security import APIKeyHeader | |
| from loguru import logger | |
| try: | |
| import httpx as _httpx | |
| except ImportError: | |
| _httpx = None | |
| from .adapter_registry import AdapterRegistry | |
| from .agent_loop import AgentLoop, AgentTrace | |
| from .agent_manager import AgentManager | |
| from .feature_flags import FeatureFlags | |
| from .runtime_config import RuntimeConfig | |
| from .schemas import ( | |
| AgentRunRequest, | |
| AgentRunResponse, | |
| AgentTraceSchema, | |
| GenerateCivilResponseRequest, | |
| GenerateCivilResponseResponse, | |
| GenerateRequest, | |
| GenerateResponse, | |
| ToolResultSchema, | |
| ) | |
| from .session_context import SessionContext, SessionStore | |
| from .tool_router import ToolType, tool_name | |
| SKIP_MODEL_LOAD = os.getenv("SKIP_MODEL_LOAD", "false").lower() in ("true", "1", "yes") | |
| async def _noop_tool(query: str, context: dict, session: Any) -> dict: | |
| """build_all_tools fallback์ฉ no-op tool.""" | |
| return {"success": False, "error": "tool์ด ์ด๊ธฐํ๋์ง ์์์ต๋๋ค"} | |
| try: | |
| from slowapi import Limiter | |
| from slowapi.middleware import SlowAPIMiddleware | |
| from slowapi.util import get_remote_address | |
| limiter = Limiter(key_func=get_remote_address) | |
| _RATE_LIMIT_AVAILABLE = True | |
| except ImportError: | |
| limiter = None | |
| _RATE_LIMIT_AVAILABLE = False | |
| _API_KEY = os.getenv("API_KEY") | |
| _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False) | |
| async def verify_api_key(api_key: str = Security(_api_key_header)): | |
| if _API_KEY is None: | |
| return | |
| if api_key != _API_KEY: | |
| raise HTTPException(status_code=401, detail="์ ํจํ์ง ์์ API ํค์ ๋๋ค.") | |
| runtime_config = RuntimeConfig.from_env() | |
| runtime_config.log_summary() | |
| MODEL_PATH = runtime_config.model.model_path | |
| DATA_PATH = runtime_config.paths.data_path | |
| INDEX_PATH = runtime_config.paths.index_path | |
| GPU_UTILIZATION = runtime_config.gpu_utilization | |
| MAX_MODEL_LEN = runtime_config.max_model_len | |
| TRUST_REMOTE_CODE = runtime_config.model.trust_remote_code | |
| _PROJECT_ROOT = str(Path(__file__).resolve().parent.parent.parent) | |
| AGENTS_DIR = runtime_config.paths.agents_dir | |
| class SamplingParams: | |
| """vLLM HTTP API์ฉ ์ํ๋ง ํ๋ผ๋ฏธํฐ. vLLM ์ง์ import ์์ด ๋์.""" | |
| max_tokens: int = 512 | |
| temperature: float = 0.7 | |
| top_p: float = 1.0 | |
| stop: Optional[list] = None | |
| repetition_penalty: float = 1.0 | |
| class PreparedGeneration: | |
| prompt: str | |
| sampling_params: SamplingParams | |
| class _VLLMOutputItem: | |
| """vLLM HTTP ์๋ต์ ๋จ์ผ choice๋ฅผ ๊ธฐ์กด ์ธํฐํ์ด์ค๋ก ๋ํ.""" | |
| def __init__(self, text: str, finish_reason: str, token_ids: list): | |
| self.text = text | |
| self.finish_reason = finish_reason | |
| self.token_ids = token_ids | |
| class _VLLMHttpResult: | |
| """vLLM HTTP ์๋ต์ ๊ธฐ์กด AsyncLLM ๊ฒฐ๊ณผ ์ธํฐํ์ด์ค๋ก ๋ํ. | |
| ๊ธฐ์กด ์ฝ๋๊ฐ ``output.outputs[0].text``, ``output.prompt_token_ids`` ๋ฑ์ | |
| ์ ๊ทผํ๋ฏ๋ก ๋์ผํ ์์ฑ์ ์ ๊ณตํ๋ค. | |
| """ | |
| def __init__(self, data: dict): | |
| self._data = data | |
| choices = data.get("choices", []) | |
| usage = data.get("usage", {}) | |
| self.outputs = [] | |
| for choice in choices: | |
| msg = choice.get("message", {}) | |
| text = msg.get("content", "") | |
| finish = choice.get("finish_reason", "stop") | |
| self.outputs.append( | |
| _VLLMOutputItem( | |
| text=text, | |
| finish_reason=finish, | |
| token_ids=list(range(usage.get("completion_tokens", 0))), | |
| ) | |
| ) | |
| self.prompt_token_ids = list(range(usage.get("prompt_tokens", 0))) | |
| def _extract_approval_request(graph_state: Any) -> Any: | |
| """LangGraph interrupt state์์ approval payload๋ฅผ ์ถ์ถํ๋ค.""" | |
| if not graph_state or not getattr(graph_state, "tasks", None): | |
| return None | |
| task = graph_state.tasks[0] | |
| if not getattr(task, "interrupts", None): | |
| return None | |
| return task.interrupts[0].value | |
| class vLLMEngineManager: | |
| """GovOn Shell MVP์ฉ ๋ก์ปฌ ๋ฐํ์ ๋งค๋์ . | |
| vLLM์ ๋ณ๋ ํ๋ก์ธ์ค(entrypoint.sh)์์ OpenAI-compatible ์๋ฒ๋ก ์คํ๋๋ค. | |
| ์ด ํด๋์ค๋ httpx๋ก vLLM HTTP API๋ฅผ ํธ์ถํ๋ค. | |
| """ | |
| def __init__(self): | |
| self._vllm_base_url = f"http://localhost:{os.getenv('VLLM_PORT', '8000')}" | |
| self._http_client: Optional[Any] = None | |
| self.feature_flags = FeatureFlags.from_env() | |
| self.session_store = SessionStore() | |
| self.agent_manager = AgentManager(AGENTS_DIR) | |
| self.agent_loop: Optional[AgentLoop] = None | |
| self.graph = None # LangGraph CompiledGraph (v2 ์๋ํฌ์ธํธ์ฉ) | |
| self._checkpointer_ctx = None # AsyncSqliteSaver ์ปจํ ์คํธ ๋งค๋์ (lifespan์์ ๊ด๋ฆฌ) | |
| self._sync_checkpointer_conn = None # SqliteSaver์ฉ sqlite3 connection (leak ๋ฐฉ์ง) | |
| self._init_agent_loop() | |
| # _init_graph()๋ lifespan()์์ ํธ์ถ โ ๋ชจ๋ ๋ก๋ ์์ ์คํ ๋ฐฉ์ง | |
| async def initialize(self): | |
| if SKIP_MODEL_LOAD: | |
| logger.info("SKIP_MODEL_LOAD=true: ๋ชจ๋ธ ๋ฐ ์ธ๋ฑ์ค ๋ก๋ฉ์ ๊ฑด๋๋๋๋ค.") | |
| return | |
| # vLLM ์๋ฒ๋ entrypoint.sh์์ ์ด๋ฏธ ๊ธฐ๋๋จ โ health check๋ง ์ํ | |
| logger.info(f"vLLM ์๋ฒ ์ฐ๊ฒฐ ํ์ธ: {self._vllm_base_url}") | |
| if _httpx is None: | |
| raise RuntimeError("httpx๊ฐ ์ค์น๋์ด ์์ง ์์ต๋๋ค. pip install httpx") | |
| self._http_client = _httpx.AsyncClient( | |
| base_url=self._vllm_base_url, | |
| timeout=_httpx.Timeout(300.0, connect=30.0), | |
| ) | |
| # vLLM ์๋ฒ health check (entrypoint.sh์์ ์ด๋ฏธ ํ์ธํ์ง๋ง ์ด์ค ๊ฒ์ฆ) | |
| for attempt in range(10): | |
| try: | |
| resp = await self._http_client.get("/health") | |
| if resp.status_code == 200: | |
| logger.info("vLLM ์๋ฒ ์ฐ๊ฒฐ ์ฑ๊ณต") | |
| return | |
| except Exception: | |
| pass | |
| logger.debug(f"vLLM ์๋ฒ ๋๊ธฐ ์ค... ({attempt + 1}/10)") | |
| await asyncio.sleep(3) | |
| raise RuntimeError(f"vLLM ์๋ฒ์ ์ฐ๊ฒฐํ ์ ์์ต๋๋ค: {self._vllm_base_url}") | |
| def _escape_special_tokens(self, text: str) -> str: | |
| tokens = [ | |
| "[|user|]", | |
| "[|assistant|]", | |
| "[|system|]", | |
| "[|endofturn|]", | |
| "<thought>", | |
| "</thought>", | |
| ] | |
| for token in tokens: | |
| text = text.replace( | |
| token, | |
| token.replace("[", "\\[") | |
| .replace("]", "\\]") | |
| .replace("<", "\\<") | |
| .replace(">", "\\>"), | |
| ) | |
| return text | |
| def _strip_thought_blocks(text: str) -> str: | |
| # <thought>...</thought> (๊ตฌํ) ๋ฐ <think>...</think> (EXAONE-4.0 ์ถ๋ก ๋ชจ๋) ๋ชจ๋ ์ ๊ฑฐ | |
| text = re.sub(r"<thought>.*?</thought>\s*", "", text, flags=re.DOTALL) | |
| text = re.sub(r"<think>.*?</think>\s*", "", text, flags=re.DOTALL) | |
| return text.strip() | |
| def _build_persona_prompt(self, agent_name: str, user_message: str) -> str: | |
| if self.agent_manager and self.agent_manager.get_agent(agent_name): | |
| return self.agent_manager.build_prompt(agent_name, user_message) | |
| return user_message | |
| def _extract_query(self, prompt: str) -> str: | |
| user_match = re.search(r"\[\|user\|\](.*?)\[\|endofturn\|\]", prompt, re.DOTALL) | |
| if user_match: | |
| user_block = user_match.group(1) | |
| complaint_match = re.search(r"๋ฏผ์\s*๋ด์ฉ\s*:\s*(.+)", user_block, re.DOTALL) | |
| if complaint_match: | |
| return complaint_match.group(1).strip() | |
| return user_block.strip() | |
| return prompt | |
| def _is_evidence_request(query: str) -> bool: | |
| return any(token in query for token in ("๊ทผ๊ฑฐ", "์ถ์ฒ", "์", "์ด์ ", "๋งํฌ")) | |
| def _is_revision_request(query: str) -> bool: | |
| return any(token in query for token in ("๋ค์", "์์ ", "๊ณ ์ณ", "์ ์ค", "๊ณต์", "๋ณด๊ฐ")) | |
| def _latest_prior_turns( | |
| self, | |
| session: SessionContext, | |
| current_query: str, | |
| ) -> tuple[Optional[str], Optional[str]]: | |
| turns = list(session.recent_history) | |
| if turns and turns[-1].role == "user" and turns[-1].content == current_query: | |
| turns = turns[:-1] | |
| previous_user = next( | |
| (turn.content for turn in reversed(turns) if turn.role == "user"), None | |
| ) | |
| previous_assistant = next( | |
| (turn.content for turn in reversed(turns) if turn.role == "assistant"), | |
| None, | |
| ) | |
| return previous_user, previous_assistant | |
| def _build_working_query(self, query: str, session: SessionContext) -> str: | |
| query = query.strip() | |
| if not query: | |
| return query | |
| if not (self._is_evidence_request(query) or self._is_revision_request(query)): | |
| return query | |
| previous_user, previous_assistant = self._latest_prior_turns(session, query) | |
| parts: List[str] = [] | |
| if previous_user: | |
| parts.append(f"์๋ ์์ฒญ: {previous_user}") | |
| if previous_assistant: | |
| parts.append(f"์ด์ ๋ต๋ณ: {previous_assistant[:600]}") | |
| if self._is_revision_request(query): | |
| parts.append(f"์์ ์์ฒญ: {query}") | |
| return "\n\n".join(parts) if parts else query | |
| def _format_evidence_items(evidence_dict: Dict[str, Any]) -> str: | |
| """EvidenceEnvelope dict๋ฅผ ์๋นํ์ฌ ์ถ์ฒ ๋ชฉ๋ก ํ ์คํธ๋ฅผ ์์ฑํ๋ค. | |
| EvidenceItem์ด ์์ผ๋ฉด source-specific branching ์์ด ๋จ์ผ ํฌ๋งคํฐ๋ก ์ฒ๋ฆฌํ๋ค. | |
| """ | |
| items = evidence_dict.get("items", []) | |
| if not items: | |
| return "" | |
| lines: list[str] = [] | |
| for idx, item in enumerate(items[:10], start=1): | |
| source_type = item.get("source_type", "") | |
| title = item.get("title", "") | |
| link = item.get("link_or_path", "") | |
| if source_type == "api": | |
| label = title or "์ธ๋ถ API ๊ฒฐ๊ณผ" | |
| if link: | |
| lines.append(f"[{idx}] {label} - {link}") | |
| else: | |
| lines.append(f"[{idx}] {label}") | |
| else: | |
| label = title or "์์ฑ ์ฐธ์กฐ" | |
| if link: | |
| lines.append(f"[{idx}] {label} - {link}") | |
| else: | |
| lines.append(f"[{idx}] {label}") | |
| return "\n".join(lines) | |
| def _summarize_evidence( | |
| self, | |
| api_lookup_data: Dict[str, Any], | |
| ) -> str: | |
| # EvidenceEnvelope๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉ | |
| evidence = api_lookup_data.get("evidence") | |
| if isinstance(evidence, dict) and evidence.get("items"): | |
| lines = ["๊ทผ๊ฑฐ ์์ฝ"] | |
| api_items = [i for i in evidence["items"] if i.get("source_type") == "api"] | |
| if api_items: | |
| titles = ", ".join(i["title"] for i in api_items[:3] if i.get("title")) | |
| lines.append( | |
| f"- ์ธ๋ถ ๋ฏผ์๋ถ์ API์์ ์ ์ฌ ์ฌ๋ก {len(api_items)}๊ฑด์ ํ์ธํ์ต๋๋ค." | |
| + (f" ๋ํ ์ฌ๋ก: {titles}" if titles else "") | |
| ) | |
| if len(lines) == 1: | |
| lines.append( | |
| "- ๋ด๋ถ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ถฉ๋ถํ ํ๋ณดํ์ง ๋ชปํด ์ผ๋ฐ ํ์ ์๋ ์์น ๊ธฐ์ค์ผ๋ก ์์ฑํ์ต๋๋ค." | |
| ) | |
| return "\n".join(lines) | |
| # Legacy ํฌ๋งคํฐ (EvidenceItem ์์ ๋) | |
| lines = ["๊ทผ๊ฑฐ ์์ฝ"] | |
| api_results = api_lookup_data.get("results", []) | |
| if api_results: | |
| titles = [] | |
| for item in api_results[:3]: | |
| title = item.get("title") or item.get("qnaTitle") or item.get("question") | |
| if title: | |
| titles.append(title) | |
| lines.append( | |
| f"- ์ธ๋ถ ๋ฏผ์๋ถ์ API์์ ์ ์ฌ ์ฌ๋ก {len(api_results)}๊ฑด์ ํ์ธํ์ต๋๋ค." | |
| + (f" ๋ํ ์ฌ๋ก: {', '.join(titles)}" if titles else "") | |
| ) | |
| if len(lines) == 1: | |
| lines.append( | |
| "- ๋ด๋ถ ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์ถฉ๋ถํ ํ๋ณดํ์ง ๋ชปํด ์ผ๋ฐ ํ์ ์๋ ์์น ๊ธฐ์ค์ผ๋ก ์์ฑํ์ต๋๋ค." | |
| ) | |
| return "\n".join(lines) | |
| def _api_source_line(index: int, item: Dict[str, Any]) -> str: | |
| title = item.get("title") or item.get("qnaTitle") or item.get("question") or "์ธ๋ถ API ๊ฒฐ๊ณผ" | |
| url = item.get("url") or item.get("detailUrl") or "" | |
| if url: | |
| return f"[{index}] {title} - {url}" | |
| return f"[{index}] {title}" | |
| def _build_evidence_section( | |
| self, | |
| session: SessionContext, | |
| current_query: str, | |
| api_data: Dict[str, Any], | |
| ) -> str: | |
| _, previous_answer = self._latest_prior_turns(session, current_query) | |
| lines = ["๊ทผ๊ฑฐ/์ถ์ฒ"] | |
| cursor = 1 | |
| # EvidenceEnvelope๊ฐ ์์ผ๋ฉด ๋จ์ผ ํฌ๋งคํฐ๋ก ์ฐ์ ์ฒ๋ฆฌ | |
| api_evidence = api_data.get("evidence") | |
| if api_evidence and isinstance(api_evidence, dict) and api_evidence.get("items"): | |
| for item in api_evidence["items"][:5]: | |
| title = item.get("title", "") or "์ธ๋ถ API ๊ฒฐ๊ณผ" | |
| link = item.get("link_or_path", "") | |
| if link: | |
| lines.append(f"[{cursor}] {title} - {link}") | |
| else: | |
| lines.append(f"[{cursor}] {title}") | |
| cursor += 1 | |
| else: | |
| # Legacy API ํฌ๋งคํฐ | |
| api_items = api_data.get("citations") or api_data.get("results") or [] | |
| for item in api_items[:5]: | |
| lines.append(self._api_source_line(cursor, item)) | |
| cursor += 1 | |
| if cursor == 1: | |
| lines.append("- ๊ฒ์ ๊ฐ๋ฅํ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค.") | |
| section = "\n".join(lines) | |
| if previous_answer: | |
| return f"{previous_answer}\n\n{section}" | |
| return section | |
| async def _prepare_civil_response_generation( | |
| self, | |
| request: GenerateCivilResponseRequest, | |
| flags: Optional[FeatureFlags] = None, | |
| external_cases: Optional[List[dict]] = None, | |
| ) -> PreparedGeneration: | |
| gen_defaults = runtime_config.generation | |
| safe_message = self._escape_special_tokens(self._extract_query(request.prompt)) | |
| user_content = f"๋ค์ ๋ฏผ์์ ๋ํ ๋ต๋ณ์ ์์ฑํด ์ฃผ์ธ์.\n\n{safe_message}" | |
| prompt = self._build_persona_prompt("draft_response", user_content) | |
| sampling_params = SamplingParams( | |
| temperature=request.temperature, | |
| top_p=request.top_p, | |
| max_tokens=request.max_tokens, | |
| stop=request.stop or gen_defaults.stop_sequences, | |
| repetition_penalty=gen_defaults.repetition_penalty, | |
| ) | |
| return PreparedGeneration( | |
| prompt=prompt, | |
| sampling_params=sampling_params, | |
| ) | |
| async def _prepare_draft_only( | |
| self, | |
| request: GenerateCivilResponseRequest, | |
| flags: Optional[FeatureFlags] = None, | |
| ) -> PreparedGeneration: | |
| """LoRA ์ด์ ์์ฑ์ฉ: ์ฟผ๋ฆฌ๋ง์ผ๋ก ํ๋กฌํํธ ์์ฑ. | |
| ์ฌ์ฉ์ ์ฟผ๋ฆฌ๋ฅผ persona ํ๋กฌํํธ๋ก ๊ฐ์ธ์ ๋ฐํํ๋ค. | |
| """ | |
| gen_defaults = runtime_config.generation | |
| safe_message = self._escape_special_tokens(self._extract_query(request.prompt)) | |
| # ํ์ต ๋ฐ์ดํฐ ํ์: user = instruction + "\n\n" + input | |
| user_content = f"๋ค์ ๋ฏผ์์ ๋ํ ๋ต๋ณ์ ์์ฑํด ์ฃผ์ธ์.\n\n{safe_message}" | |
| prompt = self._build_persona_prompt("draft_response", user_content) | |
| sampling_params = SamplingParams( | |
| temperature=( | |
| request.temperature if request.temperature is not None else gen_defaults.temperature | |
| ), | |
| top_p=request.top_p if request.top_p is not None else gen_defaults.top_p, | |
| max_tokens=request.max_tokens or gen_defaults.max_tokens, | |
| stop=request.stop or gen_defaults.stop_sequences, | |
| repetition_penalty=gen_defaults.repetition_penalty, | |
| ) | |
| return PreparedGeneration( | |
| prompt=prompt, | |
| sampling_params=sampling_params, | |
| ) | |
| async def synthesize_final( | |
| self, | |
| draft_text: str, | |
| evidence_items: list, | |
| query: str, | |
| adapter_name: str = "public_admin", | |
| ) -> str: | |
| """์ด์ + ๋๊ตฌ ๊ฒฐ๊ณผ๋ฅผ ๋ฒ ์ด์ค ๋ชจ๋ธ๋ก ํตํฉํ์ฌ ์ต์ข ๋ต๋ณ ์์ฑ. | |
| LoRA ์ด๋ํฐ๋ ํ์ต ํ์(์ง๋ฌธโ๋ต๋ณ)์ ํนํ๋์ด ์์ด | |
| ์ด์+๊ทผ๊ฑฐ ํตํฉ ๊ฐ์ ๋ฒ์ฉ ํ์คํฌ์๋ ๋ฒ ์ด์ค ๋ชจ๋ธ์ด ์ ํฉํ๋ค. | |
| """ | |
| safe_query = self._escape_special_tokens(query[:400]) | |
| safe_draft = self._escape_special_tokens(draft_text[:800]) | |
| # ๊ทผ๊ฑฐ ํ ์คํธ ์กฐ๋ฆฝ | |
| evidence_text = "" | |
| for item in evidence_items[:5]: | |
| source_type = item.get("source_type", "") | |
| title = item.get("title", "") | |
| excerpt = item.get("excerpt", "")[:200] | |
| label = "[์ธ๋ถ]" if source_type == "api" else "[์์ฑ]" | |
| if title or excerpt: | |
| evidence_text += f"- {label} {title}: {excerpt}\n" | |
| if not evidence_text.strip(): | |
| evidence_text = "(๊ฒ์ ๊ทผ๊ฑฐ ์์)" | |
| # ๋ฒ ์ด์ค ๋ชจ๋ธ ๋ฒ์ฉ ํฉ์ฑ ํ๋กฌํํธ | |
| synthesis_prompt = ( | |
| "[|system|]๋น์ ์ ๋ฏผ์ ๋ต๋ณ์ ๋ณด๊ฐํ๋ ์ ๋ฌธ๊ฐ์ ๋๋ค. " | |
| "์ด์๊ณผ ์ฐธ๊ณ ๊ทผ๊ฑฐ๋ฅผ ๊ฒฐํฉํ์ฌ ์ ํํ๊ณ ๊ณต๊ฐ์ ์ธ ์ต์ข ๋ต๋ณ์ ์์ฑํ์ธ์. " | |
| "๋ฒ์ ๊ทผ๊ฑฐ๊ฐ ์์ผ๋ฉด ์ธ์ฉํ๊ณ , ์ ์ฐจ์ ์กฐ์น์ฌํญ์ ๋ช ํํ ํฌํจํ์ธ์." | |
| "[|endofturn|]\n" | |
| "[|user|]๋ค์ ์ด์๊ณผ ๊ทผ๊ฑฐ๋ฅผ ๊ฒฐํฉํ์ฌ ์ต์ข ๋ฏผ์ ๋ต๋ณ์ ์์ฑํ์ธ์.\n\n" | |
| f"[๋ฏผ์ ์ง์]\n{safe_query}\n\n" | |
| f"[์ด์]\n{safe_draft}\n\n" | |
| f"[์ฐธ๊ณ ๊ทผ๊ฑฐ]\n{evidence_text}" | |
| "[|endofturn|]\n[|assistant|]" | |
| ) | |
| # ๋ฒ ์ด์ค ๋ชจ๋ธ ์ฌ์ฉ (LoRA ์์) โ ํฉ์ฑ์ ๋ฒ์ฉ ํ์คํฌ | |
| sampling_params = SamplingParams( | |
| max_tokens=768, | |
| temperature=0.6, | |
| top_p=0.9, | |
| stop=["[|endofturn|]"], | |
| ) | |
| import uuid as _uuid | |
| request_id = str(_uuid.uuid4()) | |
| try: | |
| output = await self._run_engine( | |
| synthesis_prompt, sampling_params, request_id, lora_request=None | |
| ) | |
| except Exception as exc: | |
| logger.warning(f"[synthesize_final] ํฉ์ฑ ์คํจ: {exc}") | |
| return draft_text | |
| if output is None or not output.outputs: | |
| return draft_text | |
| return self._strip_thought_blocks(output.outputs[0].text) | |
| async def _run_engine( | |
| self, | |
| prompt: str, | |
| sampling_params: SamplingParams, | |
| request_id: str, | |
| lora_request=None, | |
| ): | |
| """vLLM OpenAI-compatible HTTP API๋ฅผ ํตํด ํ ์คํธ๋ฅผ ์์ฑํ๋ค.""" | |
| if self._http_client is None: | |
| return None | |
| # EXAONE chat template ํ์์ prompt๋ฅผ messages๋ก ๋ณํ | |
| messages = self._prompt_to_messages(prompt) | |
| body: Dict[str, Any] = { | |
| "model": MODEL_PATH, | |
| "messages": messages, | |
| "max_tokens": sampling_params.max_tokens, | |
| "temperature": sampling_params.temperature, | |
| "stream": False, | |
| } | |
| if sampling_params.top_p is not None and sampling_params.top_p < 1.0: | |
| body["top_p"] = sampling_params.top_p | |
| if sampling_params.stop: | |
| body["stop"] = list(sampling_params.stop) | |
| if sampling_params.repetition_penalty and sampling_params.repetition_penalty != 1.0: | |
| body["repetition_penalty"] = sampling_params.repetition_penalty | |
| # LoRA ์ด๋ํฐ ์ง์ | |
| if lora_request is not None: | |
| body["model"] = lora_request.lora_name | |
| try: | |
| resp = await self._http_client.post("/v1/chat/completions", json=body) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| except Exception as exc: | |
| logger.error(f"vLLM HTTP ํธ์ถ ์คํจ: {exc}") | |
| return None | |
| # OpenAI ์๋ต์ ๊ธฐ์กด ์ธํฐํ์ด์ค์ ํธํ๋๋ ๊ฐ์ฒด๋ก ๋ํ | |
| return _VLLMHttpResult(data) | |
| def _prompt_to_messages(prompt: str) -> list: | |
| """EXAONE chat template ํ์ ํ๋กฌํํธ๋ฅผ OpenAI messages๋ก ๋ณํ.""" | |
| messages = [] | |
| # [|system|]...[|endofturn|], [|user|]...[|endofturn|], [|assistant|]... ํ์ฑ | |
| import re as _re | |
| parts = _re.split(r"\[\\?\|(\w+)\\?\|]", prompt) | |
| role = None | |
| for part in parts: | |
| if part in ("system", "user", "assistant"): | |
| role = part | |
| elif role and part.strip(): | |
| content = part.replace("[|endofturn|]", "").strip() | |
| if content: | |
| messages.append({"role": role, "content": content}) | |
| role = None | |
| if not messages: | |
| messages = [{"role": "user", "content": prompt}] | |
| return messages | |
| async def generate( | |
| self, | |
| request: GenerateRequest, | |
| request_id: str, | |
| flags: Optional[FeatureFlags] = None, | |
| ) -> Any: | |
| return await self.generate_civil_response(request, request_id, flags) | |
| async def generate_civil_response( | |
| self, | |
| request: GenerateCivilResponseRequest, | |
| request_id: str, | |
| flags: Optional[FeatureFlags] = None, | |
| external_cases: Optional[List[dict]] = None, | |
| lora_request=None, | |
| ) -> Any: | |
| prepared = await self._prepare_civil_response_generation(request, flags, external_cases) | |
| return await self._run_engine( | |
| prepared.prompt, prepared.sampling_params, request_id, lora_request=lora_request | |
| ) | |
| async def generate_stream( | |
| self, | |
| request: GenerateRequest, | |
| request_id: str, | |
| flags: Optional[FeatureFlags] = None, | |
| ) -> Any: | |
| prepared = await self._prepare_civil_response_generation(request, flags) | |
| if self._http_client is None: | |
| raise RuntimeError("vLLM ์๋ฒ์ ์ฐ๊ฒฐ๋์ง ์์์ต๋๋ค.") | |
| messages = self._prompt_to_messages(prepared.prompt) | |
| body = { | |
| "model": MODEL_PATH, | |
| "messages": messages, | |
| "max_tokens": prepared.sampling_params.max_tokens, | |
| "temperature": prepared.sampling_params.temperature, | |
| "stream": True, | |
| } | |
| if prepared.sampling_params.stop: | |
| body["stop"] = list(prepared.sampling_params.stop) | |
| return self._http_client.stream("POST", "/v1/chat/completions", json=body) | |
| def _init_agent_loop(self) -> None: | |
| from src.inference.actions.data_go_kr import MinwonAnalysisAction | |
| engine_ref = self | |
| minwon_action = MinwonAnalysisAction() | |
| async def _api_lookup_tool(query: str, context: dict, session: SessionContext) -> dict: | |
| working_query = query.strip() | |
| payload = await minwon_action.fetch_similar_cases( | |
| working_query, | |
| { | |
| **context, | |
| "session_context": session.build_context_summary(), | |
| }, | |
| ) | |
| results = payload["results"] or [] | |
| return { | |
| "query": payload["query"], | |
| "count": len(results), | |
| "results": results, | |
| "context_text": payload["context_text"], | |
| "citations": [citation.to_dict() for citation in payload["citations"]], | |
| "source": "data.go.kr", | |
| } | |
| async def _draft_response_tool( | |
| query: str, | |
| context: dict, | |
| session: SessionContext, | |
| ) -> dict: | |
| working_query = engine_ref._build_working_query(query, session) | |
| # LoRA-First: ์ฟผ๋ฆฌ๋ง์ผ๋ก ์ด์ ์์ฑ | |
| adapter_name = context.get("adapter") if context else None | |
| if not adapter_name: | |
| adapter_name = "public_admin" | |
| _adapter_reg = AdapterRegistry.get_instance() | |
| lora_req = _adapter_reg.get_lora_request(adapter_name) | |
| gen_request = GenerateCivilResponseRequest( | |
| prompt=working_query, | |
| max_tokens=2048, | |
| temperature=0.7, | |
| ) | |
| request_id = str(uuid.uuid4()) | |
| prepared = await engine_ref._prepare_draft_only(gen_request) | |
| final_output = await engine_ref._run_engine( | |
| prepared.prompt, prepared.sampling_params, request_id, lora_request=lora_req | |
| ) | |
| if final_output is None or not final_output.outputs: | |
| return { | |
| "text": "", | |
| "draft_text": "", | |
| "success": False, | |
| "error": "๋ฏผ์ ๋ต๋ณ ์ด์ ์์ฑ ์คํจ", | |
| "results": [], | |
| "context_text": "", | |
| } | |
| draft_text = engine_ref._strip_thought_blocks(final_output.outputs[0].text) | |
| return { | |
| "text": draft_text, | |
| "draft_text": draft_text, | |
| "success": True, | |
| "results": [], | |
| "context_text": draft_text, | |
| "prompt_tokens": len(final_output.prompt_token_ids), | |
| "completion_tokens": len(final_output.outputs[0].token_ids), | |
| } | |
| tool_registry = { | |
| ToolType.API_LOOKUP: _api_lookup_tool, | |
| "draft_response": _draft_response_tool, | |
| } | |
| self.agent_loop = AgentLoop(tool_registry=tool_registry) | |
| def _build_langgraph_tools(self) -> list: | |
| """LangGraph ToolNode์ฉ ๋๊ตฌ ๋ชฉ๋ก์ ์์ฑํ๋ค. | |
| build_all_tools()๋ฅผ ์ฌ์ฉํ์ฌ StructuredTool ๋ชฉ๋ก์ ๋ฐํํ๋ค. | |
| AgentLoop์ tool_registry์์ ๊ธฐ์กด closure๋ฅผ ์ถ์ถํ์ฌ ์ ๋ฌํ๋ค. | |
| """ | |
| from src.inference.graph.tools import build_all_tools | |
| if self.agent_loop is None: | |
| return build_all_tools( | |
| api_lookup_action=self._get_api_lookup_action(), | |
| ) | |
| # AgentLoop์ tool_registry์์ ๊ธฐ์กด closure๋ฅผ ์ถ์ถ | |
| raw_tools = { | |
| str(k.value if hasattr(k, "value") else k): v for k, v in self.agent_loop._tools.items() | |
| } | |
| return build_all_tools( | |
| api_lookup_action=self._get_api_lookup_action(), | |
| draft_response_fn=raw_tools.get("draft_response"), | |
| ) | |
| def _get_api_lookup_action(self) -> Any: | |
| """AgentLoop์ ๋ฑ๋ก๋ api_lookup์ MinwonAnalysisAction์ ์ถ์ถํ๋ค.""" | |
| if self.agent_loop is None: | |
| return None | |
| tool_fn = self.agent_loop._tools.get(ToolType.API_LOOKUP) | |
| # ApiLookupCapability์ธ ๊ฒฝ์ฐ action์ ์ง์ ์ถ์ถ | |
| if hasattr(tool_fn, "_action"): | |
| return tool_fn._action | |
| # closure์ธ ๊ฒฝ์ฐ action์ ์ถ์ถํ ์ ์์ผ๋ฏ๋ก None ๋ฐํ | |
| # (MinwonAnalysisAction์ _init_agent_loop์์ ์๋ก ์์ฑํ๋ค) | |
| try: | |
| from src.inference.actions.data_go_kr import MinwonAnalysisAction | |
| return MinwonAnalysisAction() | |
| except Exception: | |
| return None | |
| def _init_graph_with_async_checkpointer(self, checkpointer: object) -> None: | |
| """lifespan์์ AsyncSqliteSaver๊ฐ ์ค๋น๋ ํ graph๋ฅผ ์ฌ๊ตฌ์ฑํ๋ค.""" | |
| self._init_graph(checkpointer=checkpointer) | |
| def _init_graph(self, checkpointer: Optional[object] = None) -> None: | |
| """LangGraph StateGraph๋ฅผ ์ด๊ธฐํํ๋ค. | |
| v4 ์ํคํ ์ฒ: ReAct + ToolNode ๊ธฐ๋ฐ. | |
| LLM์ด ์์จ์ ์ผ๋ก ๋๊ตฌ ํธ์ถ์ ๊ฒฐ์ ํ๋ฉฐ, ์ ์ planner/executor๋ฅผ ์ฌ์ฉํ์ง ์๋๋ค. | |
| Parameters | |
| ---------- | |
| checkpointer : optional | |
| ์ธ๋ถ์์ ์ฃผ์ ํ LangGraph checkpointer. | |
| None์ด๋ฉด SqliteSaver(๋๊ธฐ sqlite3)๋ฅผ ์๋ํ๊ณ , | |
| import ์คํจ ์ MemorySaver๋ก fallbackํ๋ค. | |
| SqliteSaver DB ๊ฒฝ๋ก๋ SessionStore DB์ ๊ฐ์ ๋๋ ํฐ๋ฆฌ์ | |
| ``langgraph_checkpoints.db``๋ก ์์ฑ๋๋ค (๊ด์ฌ์ฌ ๋ถ๋ฆฌ). | |
| """ | |
| try: | |
| from src.inference.graph.builder import build_govon_graph | |
| except ImportError as exc: | |
| logger.warning(f"LangGraph graph ์ด๊ธฐํ ์คํจ (import ์ค๋ฅ): {exc}") | |
| return | |
| tools = self._build_langgraph_tools() | |
| # LLM ์ธ์คํด์ค ๊ตฌ์ฑ | |
| if SKIP_MODEL_LOAD: | |
| # CI/ํ ์คํธ ํ๊ฒฝ: LLM์ด ์์ผ๋ฏ๋ก graph ์ด๊ธฐํ ์คํต | |
| logger.info("SKIP_MODEL_LOAD=true: LangGraph graph ์ด๊ธฐํ ์คํต") | |
| return | |
| elif os.getenv("LANGGRAPH_MODEL_BASE_URL"): | |
| from langchain_openai import ChatOpenAI | |
| llm = ChatOpenAI( | |
| base_url=os.environ["LANGGRAPH_MODEL_BASE_URL"], | |
| api_key=os.getenv("LANGGRAPH_MODEL_API_KEY", "EMPTY"), | |
| model=os.getenv("LANGGRAPH_PLANNER_MODEL", runtime_config.model.model_path), | |
| temperature=0.0, | |
| max_tokens=1024, | |
| ) | |
| else: | |
| # ์ด์ ํ๊ฒฝ: vLLM OpenAI-compatible endpoint ์ฌ์ฉ | |
| from langchain_openai import ChatOpenAI | |
| vllm_port = os.getenv("VLLM_PORT", "8000") | |
| llm = ChatOpenAI( | |
| base_url=f"http://localhost:{vllm_port}/v1", | |
| api_key="EMPTY", | |
| model=runtime_config.model.model_path, | |
| temperature=0.0, | |
| max_tokens=1024, | |
| ) | |
| # checkpointer๊ฐ ์ธ๋ถ์์ ์ฃผ์ ๋์ง ์์ผ๋ฉด SqliteSaver๋ฅผ ์๋ํ๋ค. | |
| if checkpointer is None: | |
| checkpointer, conn = _build_sync_sqlite_checkpointer(self.session_store.db_path) | |
| if self._sync_checkpointer_conn is not None: | |
| try: | |
| self._sync_checkpointer_conn.close() | |
| except Exception: | |
| pass | |
| self._sync_checkpointer_conn = conn | |
| self.graph = build_govon_graph( | |
| llm=llm, | |
| tools=tools, | |
| session_store=self.session_store, | |
| checkpointer=checkpointer, | |
| ) | |
| logger.info("LangGraph graph ์ด๊ธฐํ ์๋ฃ") | |
| def _build_sync_sqlite_checkpointer( | |
| session_db_path: str, | |
| ) -> tuple: | |
| """SqliteSaver(๋๊ธฐ) ๋๋ MemorySaver(fallback)๋ฅผ ๋ฐํํ๋ค. | |
| LangGraph checkpointer์ฉ SQLite DB๋ SessionStore์ sessions.sqlite3์ | |
| ๊ฐ์ ๋๋ ํฐ๋ฆฌ์ ๋ณ๋ ํ์ผ ``langgraph_checkpoints.db``๋ก ์์ฑํ๋ค. | |
| ๋ DB๋ฅผ ๋ถ๋ฆฌํจ์ผ๋ก์จ ๊ด์ฌ์ฌ(์ธ์ ๋ฉํ vs. graph ์ฒดํฌํฌ์ธํธ)๋ฅผ ๋ช ํํ ๊ตฌ๋ถํ๋ค. | |
| SqliteSaver๋ ํ๋ก์ธ์ค ์ฌ์์ ํ์๋ interrupt ์ํ๋ฅผ SQLite์์ ๋ณต์ํ๋ฏ๋ก | |
| MemorySaver์ ๋ฌ๋ฆฌ ์ฌ์์-์์ (restart-safe)ํ๋ค. | |
| Parameters | |
| ---------- | |
| session_db_path : str | |
| SessionStore๊ฐ ์ฌ์ฉ ์ค์ธ sessions.sqlite3 ํ์ผ ๊ฒฝ๋ก. | |
| ์ด ๊ฒฝ๋ก์ ๋ถ๋ชจ ๋๋ ํฐ๋ฆฌ์ langgraph_checkpoints.db๋ฅผ ์์ฑํ๋ค. | |
| Returns | |
| ------- | |
| tuple[SqliteSaver | MemorySaver, sqlite3.Connection | None] | |
| (checkpointer, conn) ํํ. | |
| SqliteSaver ์ฌ์ฉ ์ conn์ ์ด๋ฆฐ sqlite3.Connection์ด๋ฉฐ, | |
| ํธ์ถ์๊ฐ ์ ์ ํ ์์ ์ closeํด์ผ ํ๋ค. | |
| MemorySaver fallback ์ conn์ None์ด๋ค. | |
| """ | |
| cp_db_path = str(Path(session_db_path).parent / "langgraph_checkpoints.db") | |
| try: | |
| from langgraph.checkpoint.sqlite import SqliteSaver | |
| conn = __import__("sqlite3").connect(cp_db_path, check_same_thread=False) | |
| saver = SqliteSaver(conn) | |
| logger.info(f"LangGraph checkpointer: SqliteSaver ({cp_db_path})") | |
| return saver, conn | |
| except ImportError: | |
| logger.warning( | |
| "langgraph-checkpoint-sqlite ๋ฏธ์ค์น โ MemorySaver๋ก fallbackํฉ๋๋ค. " | |
| "ํ๋ก์ธ์ค ์ฌ์์ ์ interrupt ์ํ๊ฐ ์๋ฉธ๋ฉ๋๋ค." | |
| ) | |
| from langgraph.checkpoint.memory import MemorySaver | |
| return MemorySaver(), None | |
| manager = vLLMEngineManager() | |
| async def lifespan(app: FastAPI): | |
| """FastAPI lifespan: ๋ชจ๋ธ/์ธ๋ฑ์ค ์ด๊ธฐํ ๋ฐ AsyncSqliteSaver ์ ๊ทธ๋ ์ด๋. | |
| startup ๋จ๊ณ์์ AsyncSqliteSaver๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ๋ฉด graph๋ฅผ ์ฌ๊ตฌ์ฑํ๋ค. | |
| AsyncSqliteSaver๋ async ์ปจํ ์คํธ ๋งค๋์ ๋ก ๊ด๋ฆฌํ๋ฉฐ, shutdown ์ ์ ๋ฆฌํ๋ค. | |
| AsyncSqliteSaver import ์คํจ ์ _init_graph์์ ์ด๋ฏธ ์ค์ ๋ | |
| SqliteSaver(๋๋ MemorySaver fallback)๋ฅผ ๊ทธ๋๋ก ์ ์งํ๋ค. | |
| """ | |
| await manager.initialize() | |
| # vLLM ์๋ฒ ์ฐ๊ฒฐ ํ graph ์ด๊ธฐํ (๋ชจ๋ ๋ก๋ ์์ ์ด ์๋ lifespan์์ ์คํ) | |
| manager._init_graph() | |
| # AsyncSqliteSaver๋ก graph ์ฌ๊ตฌ์ฑ ์๋ (๋ ๋์ async ์ฑ๋ฅ) | |
| async_cp_db = str(Path(manager.session_store.db_path).parent / "langgraph_checkpoints.db") | |
| try: | |
| from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver | |
| async with AsyncSqliteSaver.from_conn_string(async_cp_db) as async_saver: | |
| # ๋๊ธฐ SqliteSaver๊ฐ ๋ณด์ ํ๋ connection์ ๋ซ์ leak์ ๋ฐฉ์งํ๋ค. | |
| if manager._sync_checkpointer_conn is not None: | |
| try: | |
| manager._sync_checkpointer_conn.close() | |
| except Exception: | |
| pass | |
| manager._sync_checkpointer_conn = None | |
| manager._checkpointer_ctx = async_saver | |
| manager._init_graph_with_async_checkpointer(async_saver) | |
| logger.info(f"LangGraph checkpointer: AsyncSqliteSaver ({async_cp_db})") | |
| yield | |
| manager._checkpointer_ctx = None | |
| except ImportError: | |
| logger.info("AsyncSqliteSaver ๋ฏธ์ค์น โ SqliteSaver(๋๊ธฐ) ๋๋ MemorySaver๋ก ์คํํฉ๋๋ค.") | |
| yield | |
| app = FastAPI( | |
| title="GovOn Local Runtime", | |
| description="Local FastAPI daemon for the GovOn Agentic Shell MVP.", | |
| lifespan=lifespan, | |
| ) | |
| ALLOWED_ORIGINS = os.getenv("CORS_ORIGINS", "").split(",") | |
| if ALLOWED_ORIGINS and ALLOWED_ORIGINS[0]: | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=ALLOWED_ORIGINS, | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| if _RATE_LIMIT_AVAILABLE and limiter is not None: | |
| app.state.limiter = limiter | |
| app.add_middleware(SlowAPIMiddleware) | |
| async def health(): | |
| return { | |
| "status": "healthy", | |
| "profile": runtime_config.profile.value, | |
| "model": runtime_config.model.model_path, | |
| "agents_loaded": manager.agent_manager.list_agents() if manager.agent_manager else [], | |
| "feature_flags": { | |
| "model_version": manager.feature_flags.model_version, | |
| }, | |
| "session_store": { | |
| "driver": "sqlite", | |
| "path": manager.session_store.db_path, | |
| }, | |
| } | |
| def _rate_limit(limit_string: str): | |
| if _RATE_LIMIT_AVAILABLE and limiter is not None: | |
| return limiter.limit(limit_string) | |
| def _noop(func): | |
| return func | |
| return _noop | |
| def get_feature_flags(request: Request) -> FeatureFlags: | |
| header = request.headers.get("X-Feature-Flag") | |
| return manager.feature_flags.override_from_header(header) | |
| async def generate_civil_response( | |
| request: GenerateCivilResponseRequest, | |
| _: None = Depends(verify_api_key), | |
| flags: FeatureFlags = Depends(get_feature_flags), | |
| ): | |
| if request.stream: | |
| raise HTTPException(status_code=400, detail="๋ฏผ์ ๋ต๋ณ ์คํธ๋ฆฌ๋ฐ์ /v1/stream์ ์ฌ์ฉํ์ธ์.") | |
| request_id = str(uuid.uuid4()) | |
| final_output = await manager.generate_civil_response( | |
| request, | |
| request_id, | |
| flags, | |
| ) | |
| if final_output is None: | |
| raise HTTPException(status_code=500, detail="๋ฏผ์ ๋ต๋ณ ์์ฑ์ ์คํจํ์ต๋๋ค.") | |
| return GenerateCivilResponseResponse( | |
| request_id=request_id, | |
| complaint_id=request.complaint_id, | |
| text=manager._strip_thought_blocks(final_output.outputs[0].text), | |
| prompt_tokens=len(final_output.prompt_token_ids), | |
| completion_tokens=len(final_output.outputs[0].token_ids), | |
| ) | |
| async def generate( | |
| request: GenerateRequest, | |
| _: None = Depends(verify_api_key), | |
| flags: FeatureFlags = Depends(get_feature_flags), | |
| ): | |
| if request.stream: | |
| raise HTTPException(status_code=400, detail="Use /v1/stream for streaming.") | |
| request_id = str(uuid.uuid4()) | |
| final_output = await manager.generate(request, request_id, flags) | |
| if final_output is None: | |
| raise HTTPException(status_code=500, detail="Generation failed.") | |
| return GenerateResponse( | |
| request_id=request_id, | |
| complaint_id=request.complaint_id, | |
| text=manager._strip_thought_blocks(final_output.outputs[0].text), | |
| prompt_tokens=len(final_output.prompt_token_ids), | |
| completion_tokens=len(final_output.outputs[0].token_ids), | |
| ) | |
| async def chat_completions( | |
| request: Request, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| """OpenAI-compatible /v1/chat/completions. | |
| vLLM HTTP API๋ฅผ ๊ฒฝ์ ํ์ฌ ํ ์คํธ๋ฅผ ์์ฑํ๋ค. | |
| v2 ReAct graph๋ ChatOpenAI๊ฐ vLLM OpenAI ์๋ฒ์ ์ง์ ์ฐ๊ฒฐํ๋ฏ๋ก | |
| ์ด ์๋ํฌ์ธํธ๋ v1 ํธํ ์ ์ง์ฉ์ด๋ค. | |
| """ | |
| try: | |
| body = await request.json() | |
| except Exception: | |
| raise HTTPException(status_code=400, detail="Invalid JSON body.") | |
| messages: list[dict] = body.get("messages", []) | |
| if not messages: | |
| raise HTTPException(status_code=422, detail="messages must not be empty.") | |
| try: | |
| max_tokens = int(body.get("max_tokens", 512)) | |
| temperature = float(body.get("temperature", 0.7)) | |
| except (ValueError, TypeError): | |
| raise HTTPException(status_code=400, detail="Invalid max_tokens or temperature value.") | |
| if not (1 <= max_tokens <= runtime_config.max_model_len): | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"max_tokens must be between 1 and {runtime_config.max_model_len}.", | |
| ) | |
| if not (0.0 <= temperature <= 2.0): | |
| raise HTTPException(status_code=400, detail="temperature must be between 0.0 and 2.0.") | |
| model: str = body.get("model", runtime_config.model.model_path) | |
| # ๋ฉ์์ง โ ํ๋กฌํํธ ๋ณํ (EXAONE chat template ํ์) | |
| prompt_parts: list[str] = [] | |
| for msg in messages: | |
| role = msg.get("role", "user") | |
| content = msg.get("content", "") | |
| if role == "system": | |
| prompt_parts.append(f"[|system|]{content}[|endofturn|]") | |
| elif role == "user": | |
| prompt_parts.append(f"[|user|]{content}[|endofturn|]") | |
| elif role == "assistant": | |
| prompt_parts.append(f"[|assistant|]{content}[|endofturn|]") | |
| else: | |
| logger.warning(f"chat_completions: ์ง์ํ์ง ์๋ role ๋ฌด์: {role!r}") | |
| prompt_parts.append("[|assistant|]") | |
| prompt = "\n".join(prompt_parts) | |
| if manager._http_client is None: | |
| raise HTTPException(status_code=503, detail="vLLM server not connected.") | |
| request_id = str(uuid.uuid4()) | |
| logger.info( | |
| f"chat_completions request_id={request_id} messages={len(messages)} max_tokens={max_tokens}" | |
| ) | |
| sampling_params = SamplingParams( | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stop=["[|endofturn|]"], | |
| ) | |
| try: | |
| final_output = await manager._run_engine(prompt, sampling_params, request_id) | |
| except Exception as exc: | |
| logger.error(f"chat_completions generation failed: {exc}") | |
| raise HTTPException(status_code=500, detail="Generation failed due to internal error.") | |
| if final_output is None or not final_output.outputs: | |
| raise HTTPException(status_code=500, detail="Generation failed.") | |
| output = final_output.outputs[0] | |
| text = manager._strip_thought_blocks(output.text) | |
| prompt_tokens = len(final_output.prompt_token_ids) | |
| completion_tokens = len(output.token_ids) | |
| vllm_reason = getattr(output, "finish_reason", None) | |
| finish_reason = "length" if vllm_reason == "length" else "stop" | |
| return { | |
| "id": f"chatcmpl-{request_id}", | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": {"role": "assistant", "content": text}, | |
| "finish_reason": finish_reason, | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": prompt_tokens, | |
| "completion_tokens": completion_tokens, | |
| "total_tokens": prompt_tokens + completion_tokens, | |
| }, | |
| } | |
| async def stream_generate( | |
| request: GenerateRequest, | |
| _: None = Depends(verify_api_key), | |
| flags: FeatureFlags = Depends(get_feature_flags), | |
| ): | |
| if not request.stream: | |
| request.stream = True | |
| request_id = str(uuid.uuid4()) | |
| results_stream = await manager.generate_stream( | |
| request, | |
| request_id, | |
| flags, | |
| ) | |
| async def stream_results() -> AsyncGenerator[str, None]: | |
| async for request_output in results_stream: | |
| text = request_output.outputs[0].text | |
| finished = request_output.finished | |
| if finished: | |
| text = manager._strip_thought_blocks(text) | |
| response_obj = {"request_id": request_id, "text": text, "finished": finished} | |
| yield f"data: {json.dumps(response_obj, ensure_ascii=False)}\n\n" | |
| return StreamingResponse(stream_results(), media_type="text/event-stream") | |
| def _trace_to_schema(trace: AgentTrace) -> AgentTraceSchema: | |
| return AgentTraceSchema( | |
| request_id=trace.request_id, | |
| session_id=trace.session_id, | |
| plan=trace.plan_tools, | |
| plan_reason=trace.plan_reason, | |
| tool_results=[ | |
| ToolResultSchema( | |
| tool=tool_name(result.tool), | |
| success=result.success, | |
| latency_ms=round(result.latency_ms, 2), | |
| data=result.data, | |
| error=result.error, | |
| ) | |
| for result in trace.tool_results | |
| ], | |
| total_latency_ms=round(trace.total_latency_ms, 2), | |
| error=trace.error, | |
| ) | |
| async def agent_run( | |
| request: AgentRunRequest, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| if not manager.agent_loop: | |
| raise HTTPException(status_code=503, detail="์์ด์ ํธ ๋ฃจํ๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
| if request.stream: | |
| raise HTTPException(status_code=400, detail="์คํธ๋ฆฌ๋ฐ์ /v1/agent/stream์ ์ฌ์ฉํ์ธ์.") | |
| session = manager.session_store.get_or_create(session_id=request.session_id) | |
| request_id = str(uuid.uuid4()) | |
| trace = await manager.agent_loop.run( | |
| query=request.query, | |
| session=session, | |
| request_id=request_id, | |
| force_tools=request.force_tools, | |
| ) | |
| return AgentRunResponse( | |
| request_id=request_id, | |
| session_id=session.session_id, | |
| text=trace.final_text, | |
| trace=_trace_to_schema(trace), | |
| ) | |
| async def agent_stream( | |
| request: AgentRunRequest, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| if not manager.agent_loop: | |
| raise HTTPException(status_code=503, detail="์์ด์ ํธ ๋ฃจํ๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
| session = manager.session_store.get_or_create(session_id=request.session_id) | |
| request_id = str(uuid.uuid4()) | |
| async def stream_events() -> AsyncGenerator[str, None]: | |
| async for event in manager.agent_loop.run_stream( | |
| query=request.query, | |
| session=session, | |
| request_id=request_id, | |
| force_tools=request.force_tools, | |
| ): | |
| yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n" | |
| return StreamingResponse(stream_events(), media_type="text/event-stream") | |
| # --------------------------------------------------------------------------- | |
| # v2 ์๋ํฌ์ธํธ: LangGraph ๊ธฐ๋ฐ agent ์คํ (interrupt/approve ํจํด) | |
| # --------------------------------------------------------------------------- | |
| async def v2_agent_stream( | |
| request: AgentRunRequest, | |
| _http_request: Request, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| """LangGraph ๊ธฐ๋ฐ agent SSE ์คํธ๋ฆฌ๋ฐ ์คํ. | |
| graph.astream()์ ์ฌ์ฉํด ๋ ธ๋๋ณ ์๋ฃ ์ด๋ฒคํธ๋ฅผ SSE๋ก ์ ์กํ๋ค. | |
| ์ด๋ฒคํธ ํ์ (๊ฐ ์ค: ``data: <JSON>\\n\\n``): | |
| - ๋ ธ๋ ์งํ: ``{"node": "<name>", "status": "completed", ...}`` | |
| - approval_wait ๋๋ฌ: | |
| ``{"node": "approval_wait", "status": "awaiting_approval", | |
| "approval_request": {...}, "thread_id": "..."}`` | |
| - ์ค๋ฅ: ``{"node": "error", "status": "error", "error": "..."}`` | |
| ์น์ธ ํ๋ฆ: | |
| - ํด๋ผ์ด์ธํธ๋ ``awaiting_approval`` ์ด๋ฒคํธ ์์ ํ ์คํธ๋ฆผ์ด ์ข ๋ฃ๋จ์ ์ธ์งํ๊ณ | |
| ``/v2/agent/approve``๋ก ์น์ธ/๊ฑฐ์ ์ ์ ๋ฌํ๋ค. | |
| """ | |
| if not manager.graph: | |
| async def _no_graph(): | |
| yield 'data: {"node": "error", "status": "error", "error": "LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค."}\n\n' | |
| return StreamingResponse(_no_graph(), media_type="text/event-stream") | |
| from langchain_core.messages import HumanMessage | |
| thread_id = request.session_id or str(uuid.uuid4()) | |
| session_id = thread_id | |
| request_id = str(uuid.uuid4()) | |
| config = {"configurable": {"thread_id": thread_id}} | |
| initial_state = { | |
| "session_id": session_id, | |
| "request_id": request_id, | |
| "messages": [HumanMessage(content=request.query)], | |
| } | |
| # ๊ธฐ์กด interrupt ์ํ๊ฐ ๋จ์์์ผ๋ฉด ๊ฑฐ์ (cancel)๋ก ํด์ | |
| try: | |
| from langgraph.types import Command | |
| existing_state = await manager.graph.aget_state(config) | |
| if existing_state and existing_state.next: | |
| await manager.graph.ainvoke( | |
| Command(resume={"approved": False, "cancel": True}), | |
| config, | |
| ) | |
| except Exception as clear_exc: | |
| logger.warning(f"[v2] interrupt ์ํ ํ์ธ/ํด์ ์คํจ (๋ฌด์): {type(clear_exc).__name__}") | |
| async def _generate() -> AsyncGenerator[str, None]: | |
| try: | |
| async for chunk in manager.graph.astream(initial_state, config, stream_mode="updates"): | |
| # chunk: {node_name: state_delta} | |
| for node_name, state_delta in chunk.items(): | |
| event: dict = { | |
| "node": node_name, | |
| "status": "completed", | |
| } | |
| # persist ์๋ฃ ์ evidence_items๋ฅผ ์ด๋ฒคํธ์ ํฌํจ. | |
| # ์ ์ : stream_mode="updates"์์ state_delta๋ ๋ ธ๋์ raw return dict๋ค. | |
| # evidence_items ์คํค๋ง: EvidenceItem.to_dict() ํ๋๋ฅผ ๋ฐ๋ฅธ๋ค. | |
| # source_type: "api" | "llm_generated" | |
| # title, excerpt, link_or_path, page, score, provider_meta | |
| if node_name == "persist" and isinstance(state_delta, dict): | |
| if state_delta.get("final_text"): | |
| event["final_text"] = state_delta["final_text"] | |
| if state_delta.get("evidence_items"): | |
| event["evidence_items"] = state_delta["evidence_items"] | |
| # approval_wait: ๋ช ์์ ๋ ธ๋๋ช ๋๋ LangGraph interrupt() ํธ์ถ ์ | |
| # stream_mode="updates"์์ emit๋๋ "__interrupt__" ์ฒญํฌ ๋ชจ๋ ์ฒ๋ฆฌ | |
| if node_name in ("approval_wait", "__interrupt__"): | |
| try: | |
| graph_state = await manager.graph.aget_state(config) | |
| if graph_state.next: | |
| event = { | |
| "node": "approval_wait", | |
| "status": "awaiting_approval", | |
| "approval_request": _extract_approval_request(graph_state), | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| } | |
| except Exception as exc: | |
| logger.warning(f"[v2/agent/stream] aget_state ์คํจ: {exc}") | |
| event["node"] = "approval_wait" | |
| event["status"] = "awaiting_approval" | |
| event["thread_id"] = thread_id | |
| event["session_id"] = session_id | |
| event["approval_request"] = { | |
| "prompt": "์น์ธ ์ ๋ณด๋ฅผ ๋ถ๋ฌ์ฌ ์ ์์ต๋๋ค. /v2/agent/approve๋ก ์งํํ์ธ์." | |
| } | |
| yield f"data: {json.dumps(event, ensure_ascii=False)}\n\n" | |
| # Stop streaming after awaiting_approval (client must call /v2/agent/approve) | |
| if event.get("status") == "awaiting_approval": | |
| return | |
| except Exception as exc: | |
| logger.error(f"[v2/agent/stream] ์คํธ๋ฆผ ์์ธ: {exc}") | |
| error_event = {"node": "error", "status": "error", "error": str(exc)} | |
| yield f"data: {json.dumps(error_event, ensure_ascii=False)}\n\n" | |
| return StreamingResponse(_generate(), media_type="text/event-stream") | |
| async def v2_agent_run( | |
| request: AgentRunRequest, | |
| _http_request: Request, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| """LangGraph ๊ธฐ๋ฐ agent ์คํ (1๋จ๊ณ: interrupt๊น์ง). | |
| graph๋ฅผ ์คํํ์ฌ `approval_wait` ๋ ธ๋์์ interrupt๋๋ฉด | |
| `status: awaiting_approval`๊ณผ ํจ๊ป ์น์ธ ์์ฒญ ์ ๋ณด๋ฅผ ๋ฐํํ๋ค. | |
| ํด๋ผ์ด์ธํธ๋ ๋ฐํ๋ `thread_id`๋ฅผ ์ ์ฅํด๋๊ณ | |
| `/v2/agent/approve`๋ก ์น์ธ/๊ฑฐ์ ์ ์ ๋ฌํด์ผ ํ๋ค. | |
| Session Resume Contract | |
| ----------------------- | |
| ๋์ผ session_id๋ก ์ฌ์์ฒญํ๋ ๊ฒฝ์ฐ ๋ค์ ๊ท์น์ ๋ฐ๋ฅธ๋ค: | |
| 1. **interrupt ๋๊ธฐ ์ค**: graph๊ฐ approval_wait์์ interrupt ์ํ์ด๋ฉด | |
| ํ์ฌ checkpoint์์ resumeํ์ง ์๊ณ ์ ๋ฉ์์ง๋ฅผ *์ถ๊ฐํ์ฌ* ์ด์ด์ ์คํํ๋ค. | |
| (์ฌ์์ฒญ์ ์ graph_run์ผ๋ก ์ฒ๋ฆฌํ๋ค.) | |
| ์น์ธ/๊ฑฐ์ ์ ๋ฐ๋์ `/v2/agent/approve`๋ฅผ ํตํด ์ฒ๋ฆฌํด์ผ ํ๋ค. | |
| 2. **์๋ฃ๋ graph**: graph๊ฐ END์ ๋๋ฌํ ์ํ(state.next == [])์ด๋ฉด | |
| ๋์ผ thread_id์ ์ graph_run์ ์์ํ๋ค. LangGraph checkpointer๊ฐ | |
| ๋์ผ thread_id์์ ์ด์ ์ํ๋ฅผ ๋์ ํ๋ฏ๋ก ๋ํ ํ์คํ ๋ฆฌ๊ฐ ๋ณด์กด๋๋ค. | |
| 3. **ํ๋ก์ธ์ค ์ฌ์์ ํ**: SqliteSaver ์ฌ์ฉ ์ DB์์ checkpoint๊ฐ ๋ณต์๋๋ฏ๋ก | |
| interrupt ์ํ๊ฐ ์ ์ง๋๋ค. ํด๋ผ์ด์ธํธ๋ ๊ธฐ์กด thread_id๋ก `/v2/agent/approve` | |
| ๋ฅผ ๋ค์ ํธ์ถํ๋ฉด ์ค๋จ๋ ์ง์ ์์ resumeํ ์ ์๋ค. | |
| Note: session_id == thread_id. ๋ ๊ฐ์ ํญ์ ๋์ผํ๊ฒ ์ ์ง๋๋ค. | |
| """ | |
| if not manager.graph: | |
| raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
| from langchain_core.messages import HumanMessage | |
| thread_id = request.session_id or str(uuid.uuid4()) | |
| session_id = thread_id # thread_id๋ฅผ session_id๋ก ํ์ (session_id == thread_id ๋ถ๋ณ) | |
| request_id = str(uuid.uuid4()) | |
| config = {"configurable": {"thread_id": thread_id}} | |
| initial_state = { | |
| "session_id": session_id, | |
| "request_id": request_id, | |
| "messages": [HumanMessage(content=request.query)], | |
| } | |
| # ๊ธฐ์กด interrupt ์ํ๊ฐ ๋จ์์์ผ๋ฉด ๊ฑฐ์ (cancel)๋ก ํด์ | |
| try: | |
| existing_state = await manager.graph.aget_state(config) | |
| if existing_state and existing_state.next: | |
| from langgraph.types import Command | |
| await manager.graph.ainvoke( | |
| Command(resume={"approved": False, "cancel": True}), | |
| config, | |
| ) | |
| except Exception as clear_exc: | |
| logger.warning(f"[v2] interrupt ์ํ ํ์ธ/ํด์ ์คํจ (๋ฌด์): {type(clear_exc).__name__}") | |
| try: | |
| await manager.graph.ainvoke(initial_state, config) | |
| # interrupt ์ํ ํ์ธ | |
| graph_state = await manager.graph.aget_state(config) | |
| if graph_state.next: | |
| # interrupt ๋๊ธฐ ์ค: approval_request ์ ๋ณด๋ฅผ ํด๋ผ์ด์ธํธ์ ๋ฐํ | |
| return { | |
| "status": "awaiting_approval", | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| "graph_run_id": request_id, | |
| "approval_request": _extract_approval_request(graph_state), | |
| } | |
| # interrupt ์์ด ์๋ฃ๋ ๊ฒฝ์ฐ (rejected ๋๋ ์ค๋ฅ) | |
| final_state = graph_state.values | |
| return { | |
| "status": "completed", | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| "graph_run_id": request_id, | |
| "text": final_state.get("final_text", ""), | |
| "evidence_items": final_state.get("evidence_items", []), | |
| } | |
| except Exception as exc: | |
| logger.error(f"[v2/agent/run] ์์ธ ๋ฐ์: {exc}") | |
| # graph_run์ "error" status๋ก ๊ธฐ๋ก ์๋ | |
| try: | |
| if manager.session_store: | |
| session = manager.session_store.get_or_create(session_id) | |
| session.add_graph_run( | |
| request_id=request_id, | |
| plan_summary=f"[error] {exc}", | |
| approval_status="", | |
| executed_capabilities=[], | |
| status="error", | |
| total_latency_ms=0.0, | |
| ) | |
| except Exception as persist_exc: | |
| logger.warning(f"[v2/agent/run] error persist ์คํจ: {persist_exc}") | |
| logger.exception(f"[v2/agent/run] ์์ฒญ ์ฒ๋ฆฌ ์คํจ: {exc}") | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "status": "error", | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| "graph_run_id": request_id, | |
| "error": "์์ฒญ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", | |
| }, | |
| ) | |
| async def v2_agent_approve( | |
| thread_id: str, | |
| approved: bool, | |
| _http_request: Request, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| """interrupt๋ graph๋ฅผ resumeํ๋ค (2๋จ๊ณ: ์น์ธ/๊ฑฐ์ ). | |
| Parameters | |
| ---------- | |
| thread_id : str | |
| `/v2/agent/run`์์ ๋ฐํ๋ thread_id. | |
| approved : bool | |
| True๋ฉด tool_execute๋ก ์งํ, False๋ฉด graph๊ฐ END๋ก ์ข ๋ฃ. | |
| """ | |
| if not manager.graph: | |
| raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
| from langgraph.types import Command | |
| config = {"configurable": {"thread_id": thread_id}} | |
| try: | |
| result = await manager.graph.ainvoke( | |
| Command(resume={"approved": approved}), | |
| config, | |
| ) | |
| # ๊ฑฐ์ ์ด๋ฉด "rejected", ์น์ธ ์๋ฃ๋ฉด "completed" | |
| approval_status = result.get("approval_status", "") | |
| if not approved: | |
| response_status = "rejected" | |
| else: | |
| response_status = "completed" | |
| return { | |
| "status": response_status, | |
| "thread_id": thread_id, | |
| "session_id": result.get("session_id", ""), | |
| "graph_run_id": result.get("request_id", ""), | |
| "text": result.get("final_text", ""), | |
| "evidence_items": result.get("evidence_items", []), | |
| "approval_status": approval_status, | |
| } | |
| except Exception as exc: | |
| logger.error(f"[v2/agent/approve] ์์ธ ๋ฐ์: {exc}") | |
| # graph_run์ "error" status๋ก ๊ธฐ๋ก ์๋ | |
| session_id = "" | |
| request_id = "" | |
| try: | |
| if manager.session_store: | |
| graph_state = await manager.graph.aget_state(config) | |
| state_values = graph_state.values if graph_state else {} | |
| session_id = state_values.get("session_id", "") | |
| request_id = state_values.get("request_id", "") | |
| if session_id: | |
| session = manager.session_store.get_or_create(session_id) | |
| session.add_graph_run( | |
| request_id=request_id, | |
| plan_summary=f"[error] {exc}", | |
| approval_status="", | |
| executed_capabilities=[], | |
| status="error", | |
| total_latency_ms=0.0, | |
| ) | |
| except Exception as persist_exc: | |
| logger.warning(f"[v2/agent/approve] error persist ์คํจ: {persist_exc}") | |
| logger.exception(f"[v2/agent/approve] ์น์ธ ์ฒ๋ฆฌ ์คํจ: {exc}") | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "status": "error", | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| "graph_run_id": request_id, | |
| "error": "์น์ธ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", | |
| }, | |
| ) | |
| async def v2_agent_cancel( | |
| thread_id: str, | |
| _http_request: Request, | |
| _: None = Depends(verify_api_key), | |
| ): | |
| """interrupt ๋๊ธฐ ์ค์ธ graph๋ฅผ ๊ฐ์ ์ทจ์ํ๋ค. | |
| interrupt ์ํ์์ ๊ฑฐ์ ์ฒ๋ฆฌ(Command(resume={"approved": False}))๋ฅผ ์ํํ๋, | |
| state์ interrupt_reason="user_cancel"์ ์ ๋ฌํ์ฌ | |
| persist ๋ ธ๋๊ฐ graph_run status๋ฅผ "interrupted"๋ก ๊ธฐ๋กํ๊ฒ ํ๋ค. | |
| Parameters | |
| ---------- | |
| thread_id : str | |
| `/v2/agent/run`์์ ๋ฐํ๋ thread_id. | |
| """ | |
| if not manager.graph: | |
| raise HTTPException(status_code=503, detail="LangGraph graph๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") | |
| from langgraph.types import Command | |
| config = {"configurable": {"thread_id": thread_id}} | |
| try: | |
| # interrupt ์ํ ํ์ธ | |
| graph_state = await manager.graph.aget_state(config) | |
| if not graph_state or not graph_state.next: | |
| raise HTTPException( | |
| status_code=409, | |
| detail="ํด๋น thread๋ ํ์ฌ interrupt ๋๊ธฐ ์ํ๊ฐ ์๋๋๋ค.", | |
| ) | |
| session_id = graph_state.values.get("session_id", "") | |
| # ๊ฐ์ ๊ฑฐ์ + interrupt_reason ์ ๋ฌ๋ก resume | |
| result = await manager.graph.ainvoke( | |
| Command(resume={"approved": False, "cancel": True}), | |
| config, | |
| ) | |
| # persist ๋ ธ๋์์ "interrupted" ๊ธฐ๋ก์ ์ํด state update | |
| # (approval_wait_node๊ฐ cancel ์ ํธ๋ฅผ interrupt_reason์ผ๋ก ๋ณํ) | |
| return { | |
| "status": "cancelled", | |
| "thread_id": thread_id, | |
| "session_id": session_id, | |
| "graph_run_id": result.get("request_id", ""), | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as exc: | |
| logger.exception(f"[v2/agent/cancel] ์ทจ์ ์ฒ๋ฆฌ ์คํจ: {exc}") | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "status": "error", | |
| "thread_id": thread_id, | |
| "error": "์ทจ์ ์ฒ๋ฆฌ ์ค ๋ด๋ถ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", | |
| }, | |
| ) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, **runtime_config.to_uvicorn_kwargs()) | |