Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """GovOn Legal LoRA ์ด๋ํฐ ์๋น ํตํฉ ๊ฒ์ฆ ์คํฌ๋ฆฝํธ. | |
| HuggingFace Space์ ๋ฐฐํฌ๋ govon-runtime ์๋ฒ์ ๋ํด | |
| legal/civil adapter Multi-LoRA ์๋น ๋์์ ๊ฒ์ฆํ๋ค. | |
| ์ฌ์ฉ๋ฒ: | |
| GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_lora_serving.py | |
| GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_lora_serving.py | |
| ์๋ํฌ์ธํธ ์ฐธ๊ณ (src/inference/api_server.py): | |
| GET /health โ ์๋ฒ ์ํ ํ์ธ (status: "healthy") | |
| POST /v1/completions โ OpenAI-compatible (vLLM ์ง์ ์ ๊ณต) | |
| POST /v1/generate โ GovOn ๋ ๊ฑฐ์ ์์ฑ ์๋ํฌ์ธํธ | |
| POST /v2/agent/run โ LangGraph agent (REST, interrupt๊น์ง ์คํ) | |
| POST /v2/agent/stream โ LangGraph agent (SSE ์คํธ๋ฆฌ๋ฐ) | |
| GET /v1/models โ OpenAI-compatible ๋ชจ๋ธ ๋ชฉ๋ก (vLLM ์ง์ ์ ๊ณต) | |
| AgentRunRequest ํ๋: | |
| query: str โ ์ฌ์ฉ์ ์ ๋ ฅ (ํ์) | |
| session_id: str โ ์ธ์ ์๋ณ์ (์ ํ) | |
| stream: bool โ ์คํธ๋ฆฌ๋ฐ ์ฌ๋ถ (๊ธฐ๋ณธ๊ฐ False) | |
| force_tools: list โ ๊ฐ์ ์คํ ๋๊ตฌ ๋ชฉ๋ก (์ ํ) | |
| max_tokens: int โ ์ต๋ ํ ํฐ ์ (๊ธฐ๋ณธ๊ฐ 512) | |
| temperature: float โ ์จ๋ (๊ธฐ๋ณธ๊ฐ 0.7) | |
| use_rag: bool โ RAG ์ฌ์ฉ ์ฌ๋ถ (๊ธฐ๋ณธ๊ฐ True) | |
| """ | |
| # stdlib | |
| import asyncio | |
| import json | |
| import logging | |
| import os | |
| import re | |
| import sys | |
| import time | |
| from typing import Any, Optional | |
| from uuid import uuid4 | |
| BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/") | |
| API_KEY = os.environ.get("API_KEY") | |
| TIMEOUT = 300 # ์๋๋ฆฌ์ค๋น ์ต๋ ๋๊ธฐ ์๊ฐ (์ด) | |
| BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ" | |
| RESULTS_PATH = "verify_results.json" | |
| logger = logging.getLogger(__name__) | |
| # ๋ฒ๋ น ๊ด๋ จ ํจํด (Scenario 4 ๊ฒ์ฆ์ฉ) โ regex ๊ธฐ๋ฐ, ๋จ์ผ ๋ฌธ์ ์ ์ธ | |
| LEGAL_PATTERNS = [ | |
| r"์ \s*\d+\s*์กฐ", | |
| r"์ \s*\d+\s*ํญ", | |
| r"๋ฒ๋ฅ ", | |
| r"์ํ๋ น", | |
| r"์กฐ๋ก", | |
| r"ํ๋ก", | |
| r"๋๋ฒ์", | |
| ] | |
| _results: list[dict] = [] | |
| # --------------------------------------------------------------------------- | |
| # HTTP ํด๋ผ์ด์ธํธ ๋ ์ด์ด (httpx ์ฐ์ , urllib fallback) | |
| # --------------------------------------------------------------------------- | |
| try: | |
| import httpx | |
| _HTTP_BACKEND = "httpx" | |
| def _build_headers() -> dict: | |
| h = {"Content-Type": "application/json", "Accept": "application/json"} | |
| if API_KEY: | |
| h["X-API-Key"] = API_KEY | |
| return h | |
| async def http_get(path: str) -> tuple[int, dict]: | |
| url = BASE_URL + path | |
| async with httpx.AsyncClient(timeout=TIMEOUT) as client: | |
| resp = await client.get(url, headers=_build_headers()) | |
| try: | |
| return resp.status_code, resp.json() | |
| except Exception: | |
| return resp.status_code, {"_raw": resp.text[:200]} | |
| async def http_post(path: str, body: dict) -> tuple[int, dict]: | |
| url = BASE_URL + path | |
| async with httpx.AsyncClient(timeout=TIMEOUT) as client: | |
| resp = await client.post(url, json=body, headers=_build_headers()) | |
| try: | |
| return resp.status_code, resp.json() | |
| except Exception: | |
| return resp.status_code, {"_raw": resp.text[:200]} | |
| async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]: | |
| """SSE ์คํธ๋ฆฌ๋ฐ POST. ์ฒญํฌ๋ฅผ ์์งํ์ฌ ํ์ฑ๋ ์ด๋ฒคํธ ๋ชฉ๋ก์ ๋ฐํํ๋ค.""" | |
| url = BASE_URL + path | |
| h = _build_headers() | |
| h["Accept"] = "text/event-stream" | |
| events: list[dict] = [] | |
| status_code = 0 | |
| async with httpx.AsyncClient(timeout=TIMEOUT) as client: | |
| async with client.stream("POST", url, json=body, headers=h) as resp: | |
| status_code = resp.status_code | |
| async for line in resp.aiter_lines(): | |
| line = line.strip() | |
| if not line.startswith("data:"): | |
| continue | |
| payload = line[len("data:") :].strip() | |
| if not payload: | |
| continue | |
| try: | |
| events.append(json.loads(payload)) | |
| except json.JSONDecodeError: | |
| events.append({"_raw": payload}) | |
| return status_code, events | |
| except ImportError: | |
| import urllib.error | |
| import urllib.request | |
| _HTTP_BACKEND = "urllib" | |
| def _build_headers() -> dict: | |
| h = {"Content-Type": "application/json", "Accept": "application/json"} | |
| if API_KEY: | |
| h["X-API-Key"] = API_KEY | |
| return h | |
| async def http_get(path: str) -> tuple[int, dict]: | |
| url = BASE_URL + path | |
| req = urllib.request.Request(url, headers=_build_headers(), method="GET") | |
| try: | |
| with urllib.request.urlopen(req, timeout=TIMEOUT) as r: | |
| return r.status, json.loads(r.read().decode()) | |
| except urllib.error.HTTPError as e: | |
| return e.code, {} | |
| async def http_post(path: str, body: dict) -> tuple[int, dict]: | |
| url = BASE_URL + path | |
| data = json.dumps(body).encode() | |
| req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST") | |
| try: | |
| with urllib.request.urlopen(req, timeout=TIMEOUT) as r: | |
| return r.status, json.loads(r.read().decode()) | |
| except urllib.error.HTTPError as e: | |
| return e.code, {} | |
| async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]: | |
| """urllib fallback: SSE ์คํธ๋ฆฌ๋ฐ์ ๋๊ธฐ ๋ฐฉ์์ผ๋ก ์ฝ๋๋ค.""" | |
| url = BASE_URL + path | |
| data = json.dumps(body).encode() | |
| h = _build_headers() | |
| h["Accept"] = "text/event-stream" | |
| req = urllib.request.Request(url, data=data, headers=h, method="POST") | |
| events: list[dict] = [] | |
| status_code = 0 | |
| try: | |
| with urllib.request.urlopen(req, timeout=TIMEOUT) as r: | |
| status_code = r.status | |
| for raw_line in r: | |
| line = raw_line.decode("utf-8", errors="replace").strip() | |
| if not line.startswith("data:"): | |
| continue | |
| payload = line[len("data:") :].strip() | |
| if not payload: | |
| continue | |
| try: | |
| events.append(json.loads(payload)) | |
| except json.JSONDecodeError: | |
| events.append({"_raw": payload}) | |
| except urllib.error.HTTPError as e: | |
| status_code = e.code | |
| return status_code, events | |
| # --------------------------------------------------------------------------- | |
| # ๊ฒฐ๊ณผ ๊ธฐ๋ก / ์ถ๋ ฅ ํฌํผ | |
| # --------------------------------------------------------------------------- | |
| def _record( | |
| scenario_num: int, | |
| name: str, | |
| passed: bool, | |
| elapsed: float, | |
| error: Optional[str] = None, | |
| detail: Optional[Any] = None, | |
| ) -> dict: | |
| tag = "[PASS]" if passed else "[FAIL]" | |
| suffix = f"({elapsed:.2f}s)" | |
| if passed: | |
| print(f"{tag} Scenario {scenario_num}: {name} {suffix}") | |
| else: | |
| print(f"{tag} Scenario {scenario_num}: {name} โ {error} {suffix}") | |
| entry = { | |
| "scenario": scenario_num, | |
| "name": name, | |
| "passed": passed, | |
| "elapsed_s": round(elapsed, 3), | |
| "error": error, | |
| "detail": detail, | |
| } | |
| _results.append(entry) | |
| return entry | |
| def _extract_text_from_events(events: list[dict]) -> str: | |
| """SSE ์ด๋ฒคํธ ๋ชฉ๋ก์์ ์ต์ข ํ ์คํธ๋ฅผ ์ถ์ถํ๋ค. | |
| v2/agent/stream ์ด๋ฒคํธ ๊ตฌ์กฐ: | |
| - synthesis ๋ ธ๋: {"node": "synthesis", "final_text": "..."} | |
| - v1/agent/stream ์ด๋ฒคํธ: {"text": "...", "finished": true} | |
| """ | |
| # synthesis ๋ ธ๋ final_text ์ฐ์ | |
| for ev in reversed(events): | |
| if ev.get("node") == "synthesis" and ev.get("final_text"): | |
| return ev["final_text"] | |
| # v1 ์คํธ๋ฆฌ๋ฐ ํธํ: finished=true์ธ ๋ง์ง๋ง ์ด๋ฒคํธ์ text | |
| for ev in reversed(events): | |
| if ev.get("finished") and ev.get("text"): | |
| return ev["text"] | |
| # ์ ์ฒด ์ด๋ฒคํธ์์ non-empty text๋ฅผ ์ด์ด๋ถ์ธ๋ค (fallback) | |
| chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events] | |
| return "".join(c for c in chunks if c) | |
| def _contains_legal_keyword(text: str) -> bool: | |
| return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS) | |
| # --------------------------------------------------------------------------- | |
| # ์๋๋ฆฌ์ค ๊ตฌํ | |
| # --------------------------------------------------------------------------- | |
| async def scenario1_health_check() -> dict: | |
| """Scenario 1: Health Check.""" | |
| t0 = time.monotonic() | |
| try: | |
| status_code, body = await http_get("/health") | |
| elapsed = time.monotonic() - t0 | |
| if status_code != 200: | |
| return _record(1, "Health Check", False, elapsed, f"HTTP {status_code}", {"body": body}) | |
| # api_server.py: /health๋ "status": "healthy" ๋ฐํ | |
| srv_status = body.get("status", "") | |
| if srv_status not in ("ok", "healthy"): | |
| return _record( | |
| 1, | |
| "Health Check", | |
| False, | |
| elapsed, | |
| f"status ํ๋๊ฐ ok/healthy๊ฐ ์๋: {srv_status!r}", | |
| {"body": body}, | |
| ) | |
| return _record(1, "Health Check", True, elapsed, detail={"status": srv_status}) | |
| except Exception as exc: | |
| return _record(1, "Health Check", False, time.monotonic() - t0, str(exc)) | |
| async def scenario2_base_model_generation() -> dict: | |
| """Scenario 2: Base Model Generation (OpenAI-compatible /v1/completions). | |
| vLLM์ด /v1/completions ์๋ํฌ์ธํธ๋ฅผ ์ง์ ๋ ธ์ถํ๋ค. | |
| GovOn api_server.py์ ๊ตฌํ๋์ด ์์ง ์์ผ๋ฏ๋ก vLLM ๋ ์ด์ด ์๋ํฌ์ธํธ๋ฅผ ์ฌ์ฉํ๋ค. | |
| ์๋ฒ๊ฐ /v1/completions๋ฅผ ์ง์ํ์ง ์์ผ๋ฉด /v1/generate ๋ ๊ฑฐ์๋ก fallbackํ๋ค. | |
| """ | |
| t0 = time.monotonic() | |
| body_completions = { | |
| "model": BASE_MODEL, | |
| "prompt": "๋ํ๋ฏผ๊ตญ ์๋๋ ์ด๋์ ๋๊น?", | |
| "max_tokens": 64, | |
| "temperature": 0.0, | |
| } | |
| try: | |
| status_code, resp = await http_post("/v1/completions", body_completions) | |
| elapsed = time.monotonic() - t0 | |
| # vLLM /v1/completions ์๋ต ๊ตฌ์กฐ ํ์ธ | |
| if status_code == 200: | |
| choices = resp.get("choices", []) | |
| if choices and choices[0].get("text") is not None: | |
| text = choices[0]["text"] | |
| return _record( | |
| 2, | |
| "Base Model Generation", | |
| True, | |
| elapsed, | |
| detail={"endpoint": "/v1/completions", "text_preview": text[:100]}, | |
| ) | |
| return _record( | |
| 2, "Base Model Generation", False, elapsed, "choices[0].text ์์", {"resp": resp} | |
| ) | |
| # /v1/completions ๋ฏธ์ง์ ์ /v1/generate ๋ ๊ฑฐ์๋ก fallback | |
| body_legacy = { | |
| "prompt": "๋ํ๋ฏผ๊ตญ ์๋๋ ์ด๋์ ๋๊น?", | |
| "max_tokens": 64, | |
| "temperature": 0.0, | |
| "use_rag": False, | |
| } | |
| status_code2, resp2 = await http_post("/v1/generate", body_legacy) | |
| elapsed2 = time.monotonic() - t0 | |
| if status_code2 == 200 and resp2.get("text"): | |
| return _record( | |
| 2, | |
| "Base Model Generation", | |
| True, | |
| elapsed2, | |
| detail={"endpoint": "/v1/generate (fallback)", "text_preview": resp2["text"][:100]}, | |
| ) | |
| return _record( | |
| 2, | |
| "Base Model Generation", | |
| False, | |
| elapsed2, | |
| f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}", | |
| {"completions_resp": resp, "generate_resp": resp2}, | |
| ) | |
| except Exception as exc: | |
| return _record(2, "Base Model Generation", False, time.monotonic() - t0, str(exc)) | |
| async def _call_agent( | |
| message: str, | |
| session_id: str, | |
| use_stream: bool = True, | |
| ) -> tuple[bool, str, Optional[str]]: | |
| """์์ด์ ํธ ์๋ํฌ์ธํธ๋ฅผ ํธ์ถํ๊ณ (์ฑ๊ณต์ฌ๋ถ, ์๋ตํ ์คํธ, ์๋ฌ) ๋ฅผ ๋ฐํํ๋ค. | |
| v2/agent/stream (SSE) โ v2/agent/run (REST) ์์ผ๋ก ์๋ํ๋ค. | |
| use_rag=False๋ฅผ ๊ธฐ๋ณธ์ผ๋ก ์ ๋ฌํ์ฌ LoRA ๊ฒฝ๋ก๋ฅผ ๊ฐ์ ํ๋ค. | |
| """ | |
| body = {"query": message, "session_id": session_id, "use_rag": False} | |
| # v2/agent/stream ์๋ (SSE) | |
| if use_stream: | |
| try: | |
| status_code, events = await http_post_sse("/v2/agent/stream", body) | |
| if status_code == 200 and events: | |
| text = _extract_text_from_events(events) | |
| if text: | |
| return True, text, None | |
| # ์ด๋ฒคํธ๋ ์์ ํ์ง๋ง text๊ฐ ์๋ ๊ฒฝ์ฐ โ error ์ด๋ฒคํธ ํ์ธ | |
| for ev in events: | |
| if ev.get("status") == "error": | |
| return False, "", ev.get("error", "unknown error") | |
| # __interrupt__ ๋๋ awaiting_approval ์ด๋ฒคํธ โ ์๋ ์น์ธ ํ ์ต์ข ํ ์คํธ ์์ง | |
| # LangGraph interrupt()๋ "__interrupt__" ๋ ธ๋๋ก emit๋จ | |
| awaiting = next( | |
| ( | |
| ev | |
| for ev in events | |
| if ev.get("status") == "awaiting_approval" | |
| or ev.get("node") == "__interrupt__" | |
| ), | |
| None, | |
| ) | |
| if awaiting: | |
| thread_id = awaiting.get("thread_id") or session_id | |
| try: | |
| approve_code, approve_resp = await http_post( | |
| f"/v2/agent/approve?thread_id={thread_id}&approved=true", {} | |
| ) | |
| if approve_code == 200: | |
| final_text = approve_resp.get("text", "") or approve_resp.get( | |
| "final_text", "" | |
| ) | |
| if final_text: | |
| return True, final_text, None | |
| return False, "", f"approve 200 but text ์์: {approve_resp}" | |
| return False, "", f"approve HTTP {approve_code}: {approve_resp}" | |
| except Exception as approve_exc: | |
| return False, "", f"approve ํธ์ถ ์คํจ: {approve_exc}" | |
| return False, "", f"SSE ์ด๋ฒคํธ ์์ ํ์ผ๋ text ์์ (events={len(events)})" | |
| except Exception as exc: | |
| logger.warning("Stream error: %s", exc) # fallback to /v2/agent/run | |
| # v2/agent/run ์๋ (REST) | |
| try: | |
| status_code, resp = await http_post("/v2/agent/run", body) | |
| if status_code == 200: | |
| text = resp.get("text", "") or resp.get("final_text", "") | |
| if resp.get("status") == "error": | |
| return False, text, resp.get("error", "agent run error") | |
| if text: | |
| return True, text, None | |
| # awaiting_approval ์ํ โ ์ค์ ํ ์คํธ ์์ฑ ์์์ผ๋ก failure ์ฒ๋ฆฌ | |
| if resp.get("status") == "awaiting_approval": | |
| return ( | |
| False, | |
| "", | |
| f"awaiting_approval: ํ ์คํธ ๋ฏธ์์ฑ (thread_id={resp.get('thread_id')})", | |
| ) | |
| return False, "", f"text ์์, status={resp.get('status')}" | |
| return False, "", f"HTTP {status_code}: {resp}" | |
| except Exception as exc: | |
| return False, "", str(exc) | |
| # Scenario 3/4 ๊ณต์ ์ธ์ ID (๋์ผ run์์ ๊ฐ์ ์ธ์ ์ฌ์ฉ) | |
| _RUN_SESSION_ID = str(uuid4()) | |
| async def scenario3_civil_lora() -> dict: | |
| """Scenario 3: Civil LoRA โ draft_civil_response (v2/agent/stream).""" | |
| t0 = time.monotonic() | |
| try: | |
| ok, text, err = await _call_agent( | |
| message="์ฃผ์ฐจ ์๋ฐ ๊ณผํ๋ฃ ์ด์์ ์ฒญ ๋ฏผ์์ ๋ํ ๋ต๋ณ ์ด์์ ์์ฑํด์ค", | |
| session_id=_RUN_SESSION_ID, | |
| ) | |
| elapsed = time.monotonic() - t0 | |
| if not ok: | |
| return _record( | |
| 3, | |
| "Civil LoRA (draft_civil_response)", | |
| False, | |
| elapsed, | |
| err, | |
| {"text_preview": text[:200] if text else ""}, | |
| ) | |
| if not text.strip(): | |
| return _record( | |
| 3, "Civil LoRA (draft_civil_response)", False, elapsed, "์๋ต ํ ์คํธ๊ฐ ๋น์ด์์" | |
| ) | |
| return _record( | |
| 3, | |
| "Civil LoRA (draft_civil_response)", | |
| True, | |
| elapsed, | |
| detail={"text_preview": text[:200]}, | |
| ) | |
| except Exception as exc: | |
| return _record( | |
| 3, "Civil LoRA (draft_civil_response)", False, time.monotonic() - t0, str(exc) | |
| ) | |
| async def scenario4_legal_lora() -> dict: | |
| """Scenario 4: Legal LoRA โ append_evidence (v2/agent/stream). | |
| ๋ ๋ฆฝ ์ธ์ ์์ ๋ฏผ์ ๋ต๋ณ ์ด์ ์์ฒญ ํ ๋์ผ ์ธ์ ์์ ๋ฒ๋ น ๊ทผ๊ฑฐ ๋ณด๊ฐ์ ์์ฒญํ๋ค. | |
| ์๋ต์ ๋ฒ๋ น/์กฐํญ ๊ด๋ จ ํจํด์ด ํฌํจ๋์ด ์๋์ง ํ์ธํ๋ค. | |
| """ | |
| t0 = time.monotonic() | |
| session_id = str(uuid4()) | |
| try: | |
| # ๋์ผ ์ธ์ ์์ civil ์์ฒญ ๋จผ์ (append_evidence๋ ์ด์ ๋ต๋ณ ์ปจํ ์คํธ ํ์) | |
| ok_civil, _, err_civil = await _call_agent( | |
| message="๊ฑด์ถ ํ๊ฐ ์ ์ฒญ ๋ฏผ์์ ๋ํ ๋ต๋ณ ์ด์์ ์์ฑํด์ค", | |
| session_id=session_id, | |
| ) | |
| if not ok_civil: | |
| elapsed = time.monotonic() - t0 | |
| return _record( | |
| 4, | |
| "Legal LoRA (append_evidence)", | |
| False, | |
| elapsed, | |
| f"civil ์ ํ ์์ฒญ ์คํจ: {err_civil}", | |
| ) | |
| ok, text, err = await _call_agent( | |
| message="์ ๋ต๋ณ์ ๊ด๋ จ ๋ฒ๋ น๊ณผ ํ๋ก ๊ทผ๊ฑฐ๋ฅผ ๋ณด๊ฐํด์ค", | |
| session_id=session_id, | |
| ) | |
| elapsed = time.monotonic() - t0 | |
| if not ok: | |
| return _record( | |
| 4, | |
| "Legal LoRA (append_evidence)", | |
| False, | |
| elapsed, | |
| err, | |
| {"text_preview": text[:200] if text else ""}, | |
| ) | |
| if not text.strip(): | |
| return _record( | |
| 4, "Legal LoRA (append_evidence)", False, elapsed, "์๋ต ํ ์คํธ๊ฐ ๋น์ด์์" | |
| ) | |
| has_legal = _contains_legal_keyword(text) | |
| matched = [p for p in LEGAL_PATTERNS if re.search(p, text)] | |
| detail = { | |
| "has_legal_keyword": has_legal, | |
| "matched_patterns": matched, | |
| "text_preview": text[:300], | |
| } | |
| if not has_legal: | |
| return _record( | |
| 4, | |
| "Legal LoRA (append_evidence)", | |
| False, | |
| elapsed, | |
| f"๋ฒ๋ น ํจํด ๋ฏธ๋ฐ๊ฒฌ ({LEGAL_PATTERNS[:3]}...)", | |
| detail, | |
| ) | |
| return _record(4, "Legal LoRA (append_evidence)", True, elapsed, detail=detail) | |
| except Exception as exc: | |
| return _record(4, "Legal LoRA (append_evidence)", False, time.monotonic() - t0, str(exc)) | |
| async def scenario5_sequential_multi_lora_switching() -> dict: | |
| """Scenario 5: Sequential Multi-LoRA Switching (civil โ legal x3). | |
| civil ์์ฒญ โ legal ์์ฒญ์ 3ํ ๋ฐ๋ณตํ์ฌ LoRA ์ ํ ์ค๋ฅ๊ฐ ์๋์ง ํ์ธํ๋ค. | |
| ๋ฐ๋ณต๋ง๋ค ๋ณ๋์ UUID ์ธ์ ID๋ฅผ ์ฌ์ฉํ๋ค. | |
| """ | |
| t0 = time.monotonic() | |
| errors: list[str] = [] | |
| iterations = 3 | |
| for i in range(1, iterations + 1): | |
| session_id = str(uuid4()) | |
| # civil ์์ฒญ | |
| ok, text, err = await _call_agent( | |
| message="ํ์ ์ฒ๋ถ ์ด์์ ์ฒญ ๋ฏผ์ ๋ต๋ณ ์ด์์ ์์ฑํด์ค", | |
| session_id=session_id, | |
| ) | |
| if not ok or not text.strip(): | |
| errors.append(f"iter {i} civil: {err or '๋น ์๋ต'}") | |
| continue | |
| # legal ์์ฒญ (๋์ผ ์ธ์ ) | |
| ok2, text2, err2 = await _call_agent( | |
| message="์ ๋ต๋ณ์ ๊ด๋ จ ๋ฒ๋ น ๊ทผ๊ฑฐ๋ฅผ ์ถ๊ฐํด์ค", | |
| session_id=session_id, | |
| ) | |
| if not ok2 or not text2.strip(): | |
| errors.append(f"iter {i} legal: {err2 or '๋น ์๋ต'}") | |
| elapsed = time.monotonic() - t0 | |
| if errors: | |
| return _record( | |
| 5, | |
| "Sequential Multi-LoRA Switching", | |
| False, | |
| elapsed, | |
| "; ".join(errors), | |
| {"iterations": iterations, "errors": errors}, | |
| ) | |
| return _record( | |
| 5, | |
| "Sequential Multi-LoRA Switching", | |
| True, | |
| elapsed, | |
| detail={"iterations": iterations, "all_passed": True}, | |
| ) | |
| async def scenario6_lora_id_consistency() -> dict: | |
| """Scenario 6: LoRA ID Consistency Check (์ ๋ณด์ฑ). | |
| /v1/models (vLLM OpenAI-compatible)์์ civil/legal ์ด๋ํฐ ๋ ธ์ถ ์ฌ๋ถ๋ฅผ ํ์ธํ๋ค. | |
| vLLM์ ๋ฒ์ /์ค์ ์ ๋ฐ๋ผ LoRA ์ด๋ํฐ๋ฅผ /v1/models์ ๋ ธ์ถํ์ง ์์ ์ ์์ผ๋ฏ๋ก, | |
| ๋ฏธ๊ฐ์ง ์ FAIL์ด ์๋ WARNING์ผ๋ก ๊ธฐ๋กํ๊ณ ์ ์ฒด ๊ฒฐ๊ณผ์ ์ํฅ์ ์ฃผ์ง ์๋๋ค. | |
| """ | |
| t0 = time.monotonic() | |
| try: | |
| status_code, health = await http_get("/health") | |
| elapsed = time.monotonic() - t0 | |
| if status_code != 200: | |
| return _record( | |
| 6, "LoRA ID Consistency Check", False, elapsed, f"/health HTTP {status_code}" | |
| ) | |
| detail: dict = {"health_status": health.get("status")} | |
| # /health feature_flags / agents_loaded ์ ๋ณด ๊ธฐ๋ก | |
| detail["agents_loaded"] = health.get("agents_loaded", []) | |
| detail["model"] = health.get("model", "") | |
| detail["feature_flags"] = health.get("feature_flags", {}) | |
| civil_found = False | |
| legal_found = False | |
| # /v1/models ์๋ (vLLM OpenAI-compatible) | |
| try: | |
| models_status, models_resp = await http_get("/v1/models") | |
| if models_status == 200: | |
| model_ids = [m.get("id", "") for m in models_resp.get("data", [])] | |
| detail["v1_models"] = model_ids | |
| civil_found = any("civil" in mid for mid in model_ids) | |
| legal_found = any("legal" in mid for mid in model_ids) | |
| detail["civil_adapter_in_models"] = civil_found | |
| detail["legal_adapter_in_models"] = legal_found | |
| except Exception as exc: | |
| logger.warning("Failed to fetch /v1/models: %s", exc) | |
| detail["v1_models"] = "unavailable" | |
| # vLLM์ด /v1/models์ ์ด๋ํฐ๋ฅผ ๋ ธ์ถํ์ง ์์ ์ ์์ผ๋ฏ๋ก ์ ๋ณด์ฑ ๊ธฐ๋ก๋ง ์ํ | |
| if not civil_found or not legal_found: | |
| missing = [] | |
| if not civil_found: | |
| missing.append("civil") | |
| if not legal_found: | |
| missing.append("legal") | |
| detail["warning"] = f"์ด๋ํฐ ๋ฏธ๊ฐ์ง (vLLM ๋ฒ์ ์ ๋ฐ๋ผ ์ ์): {', '.join(missing)}" | |
| logger.warning(detail["warning"]) | |
| return _record(6, "LoRA ID Consistency Check", True, time.monotonic() - t0, detail=detail) | |
| except Exception as exc: | |
| return _record(6, "LoRA ID Consistency Check", False, time.monotonic() - t0, str(exc)) | |
| # --------------------------------------------------------------------------- | |
| # ๋ฉ์ธ ๋ฌ๋ | |
| # --------------------------------------------------------------------------- | |
| async def main() -> int: | |
| print("GovOn Legal LoRA ์๋น ํตํฉ ๊ฒ์ฆ") | |
| print(f" ๋์ ์๋ฒ: {BASE_URL}") | |
| print(f" ์ธ์ฆ: {'API_KEY ์ค์ ๋จ' if API_KEY else '๋ฏธ์ค์ (๋น์ธ์ฆ)'}") | |
| print(f" HTTP ๋ฐฑ์๋: {_HTTP_BACKEND}") | |
| print(f" ํ์์์: {TIMEOUT}s / ์๋๋ฆฌ์ค") | |
| print("-" * 60) | |
| scenarios = [ | |
| scenario1_health_check, | |
| scenario2_base_model_generation, | |
| scenario3_civil_lora, | |
| scenario4_legal_lora, | |
| scenario5_sequential_multi_lora_switching, | |
| scenario6_lora_id_consistency, | |
| ] | |
| for fn in scenarios: | |
| await fn() | |
| print("-" * 60) | |
| passed = sum(1 for r in _results if r["passed"]) | |
| failed = len(_results) - passed | |
| print(f"๊ฒฐ๊ณผ: {passed}/{len(_results)} ํต๊ณผ, {failed} ์คํจ") | |
| # JSON ๊ฒฐ๊ณผ ์ ์ฅ | |
| output = { | |
| "server_url": BASE_URL, | |
| "http_backend": _HTTP_BACKEND, | |
| "total": len(_results), | |
| "passed": passed, | |
| "failed": failed, | |
| "scenarios": _results, | |
| } | |
| with open(RESULTS_PATH, "w", encoding="utf-8") as f: | |
| json.dump(output, f, ensure_ascii=False, indent=2) | |
| print(f"๊ฒฐ๊ณผ ์ ์ฅ: {RESULTS_PATH}") | |
| return 0 if failed == 0 else 1 | |
| if __name__ == "__main__": | |
| exit_code = asyncio.run(main()) | |
| sys.exit(exit_code) | |