govon-runtime / scripts /verify_lora_serving.py
umyunsang's picture
sync: scripts/ (verify_e2e_tool_calling.py)
769e684 verified
#!/usr/bin/env python3
"""GovOn Legal LoRA ์–ด๋Œ‘ํ„ฐ ์„œ๋น™ ํ†ตํ•ฉ ๊ฒ€์ฆ ์Šคํฌ๋ฆฝํŠธ.
HuggingFace Space์— ๋ฐฐํฌ๋œ govon-runtime ์„œ๋ฒ„์— ๋Œ€ํ•ด
legal/civil adapter Multi-LoRA ์„œ๋น™ ๋™์ž‘์„ ๊ฒ€์ฆํ•œ๋‹ค.
์‚ฌ์šฉ๋ฒ•:
GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_lora_serving.py
GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_lora_serving.py
์—”๋“œํฌ์ธํŠธ ์ฐธ๊ณ  (src/inference/api_server.py):
GET /health โ€” ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ (status: "healthy")
POST /v1/completions โ€” OpenAI-compatible (vLLM ์ง์ ‘ ์ œ๊ณต)
POST /v1/generate โ€” GovOn ๋ ˆ๊ฑฐ์‹œ ์ƒ์„ฑ ์—”๋“œํฌ์ธํŠธ
POST /v2/agent/run โ€” LangGraph agent (REST, interrupt๊นŒ์ง€ ์‹คํ–‰)
POST /v2/agent/stream โ€” LangGraph agent (SSE ์ŠคํŠธ๋ฆฌ๋ฐ)
GET /v1/models โ€” OpenAI-compatible ๋ชจ๋ธ ๋ชฉ๋ก (vLLM ์ง์ ‘ ์ œ๊ณต)
AgentRunRequest ํ•„๋“œ:
query: str โ€” ์‚ฌ์šฉ์ž ์ž…๋ ฅ (ํ•„์ˆ˜)
session_id: str โ€” ์„ธ์…˜ ์‹๋ณ„์ž (์„ ํƒ)
stream: bool โ€” ์ŠคํŠธ๋ฆฌ๋ฐ ์—ฌ๋ถ€ (๊ธฐ๋ณธ๊ฐ’ False)
force_tools: list โ€” ๊ฐ•์ œ ์‹คํ–‰ ๋„๊ตฌ ๋ชฉ๋ก (์„ ํƒ)
max_tokens: int โ€” ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜ (๊ธฐ๋ณธ๊ฐ’ 512)
temperature: float โ€” ์˜จ๋„ (๊ธฐ๋ณธ๊ฐ’ 0.7)
use_rag: bool โ€” RAG ์‚ฌ์šฉ ์—ฌ๋ถ€ (๊ธฐ๋ณธ๊ฐ’ True)
"""
# stdlib
import asyncio
import json
import logging
import os
import re
import sys
import time
from typing import Any, Optional
from uuid import uuid4
BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/")
API_KEY = os.environ.get("API_KEY")
TIMEOUT = 300 # ์‹œ๋‚˜๋ฆฌ์˜ค๋‹น ์ตœ๋Œ€ ๋Œ€๊ธฐ ์‹œ๊ฐ„ (์ดˆ)
BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ"
RESULTS_PATH = "verify_results.json"
logger = logging.getLogger(__name__)
# ๋ฒ•๋ น ๊ด€๋ จ ํŒจํ„ด (Scenario 4 ๊ฒ€์ฆ์šฉ) โ€” regex ๊ธฐ๋ฐ˜, ๋‹จ์ผ ๋ฌธ์ž ์ œ์™ธ
LEGAL_PATTERNS = [
r"์ œ\s*\d+\s*์กฐ",
r"์ œ\s*\d+\s*ํ•ญ",
r"๋ฒ•๋ฅ ",
r"์‹œํ–‰๋ น",
r"์กฐ๋ก€",
r"ํŒ๋ก€",
r"๋Œ€๋ฒ•์›",
]
_results: list[dict] = []
# ---------------------------------------------------------------------------
# HTTP ํด๋ผ์ด์–ธํŠธ ๋ ˆ์ด์–ด (httpx ์šฐ์„ , urllib fallback)
# ---------------------------------------------------------------------------
try:
import httpx
_HTTP_BACKEND = "httpx"
def _build_headers() -> dict:
h = {"Content-Type": "application/json", "Accept": "application/json"}
if API_KEY:
h["X-API-Key"] = API_KEY
return h
async def http_get(path: str) -> tuple[int, dict]:
url = BASE_URL + path
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.get(url, headers=_build_headers())
try:
return resp.status_code, resp.json()
except Exception:
return resp.status_code, {"_raw": resp.text[:200]}
async def http_post(path: str, body: dict) -> tuple[int, dict]:
url = BASE_URL + path
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.post(url, json=body, headers=_build_headers())
try:
return resp.status_code, resp.json()
except Exception:
return resp.status_code, {"_raw": resp.text[:200]}
async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
"""SSE ์ŠคํŠธ๋ฆฌ๋ฐ POST. ์ฒญํฌ๋ฅผ ์ˆ˜์ง‘ํ•˜์—ฌ ํŒŒ์‹ฑ๋œ ์ด๋ฒคํŠธ ๋ชฉ๋ก์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค."""
url = BASE_URL + path
h = _build_headers()
h["Accept"] = "text/event-stream"
events: list[dict] = []
status_code = 0
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
async with client.stream("POST", url, json=body, headers=h) as resp:
status_code = resp.status_code
async for line in resp.aiter_lines():
line = line.strip()
if not line.startswith("data:"):
continue
payload = line[len("data:") :].strip()
if not payload:
continue
try:
events.append(json.loads(payload))
except json.JSONDecodeError:
events.append({"_raw": payload})
return status_code, events
except ImportError:
import urllib.error
import urllib.request
_HTTP_BACKEND = "urllib"
def _build_headers() -> dict:
h = {"Content-Type": "application/json", "Accept": "application/json"}
if API_KEY:
h["X-API-Key"] = API_KEY
return h
async def http_get(path: str) -> tuple[int, dict]:
url = BASE_URL + path
req = urllib.request.Request(url, headers=_build_headers(), method="GET")
try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
return r.status, json.loads(r.read().decode())
except urllib.error.HTTPError as e:
return e.code, {}
async def http_post(path: str, body: dict) -> tuple[int, dict]:
url = BASE_URL + path
data = json.dumps(body).encode()
req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST")
try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
return r.status, json.loads(r.read().decode())
except urllib.error.HTTPError as e:
return e.code, {}
async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
"""urllib fallback: SSE ์ŠคํŠธ๋ฆฌ๋ฐ์„ ๋™๊ธฐ ๋ฐฉ์‹์œผ๋กœ ์ฝ๋Š”๋‹ค."""
url = BASE_URL + path
data = json.dumps(body).encode()
h = _build_headers()
h["Accept"] = "text/event-stream"
req = urllib.request.Request(url, data=data, headers=h, method="POST")
events: list[dict] = []
status_code = 0
try:
with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
status_code = r.status
for raw_line in r:
line = raw_line.decode("utf-8", errors="replace").strip()
if not line.startswith("data:"):
continue
payload = line[len("data:") :].strip()
if not payload:
continue
try:
events.append(json.loads(payload))
except json.JSONDecodeError:
events.append({"_raw": payload})
except urllib.error.HTTPError as e:
status_code = e.code
return status_code, events
# ---------------------------------------------------------------------------
# ๊ฒฐ๊ณผ ๊ธฐ๋ก / ์ถœ๋ ฅ ํ—ฌํผ
# ---------------------------------------------------------------------------
def _record(
scenario_num: int,
name: str,
passed: bool,
elapsed: float,
error: Optional[str] = None,
detail: Optional[Any] = None,
) -> dict:
tag = "[PASS]" if passed else "[FAIL]"
suffix = f"({elapsed:.2f}s)"
if passed:
print(f"{tag} Scenario {scenario_num}: {name} {suffix}")
else:
print(f"{tag} Scenario {scenario_num}: {name} โ€” {error} {suffix}")
entry = {
"scenario": scenario_num,
"name": name,
"passed": passed,
"elapsed_s": round(elapsed, 3),
"error": error,
"detail": detail,
}
_results.append(entry)
return entry
def _extract_text_from_events(events: list[dict]) -> str:
"""SSE ์ด๋ฒคํŠธ ๋ชฉ๋ก์—์„œ ์ตœ์ข… ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•œ๋‹ค.
v2/agent/stream ์ด๋ฒคํŠธ ๊ตฌ์กฐ:
- synthesis ๋…ธ๋“œ: {"node": "synthesis", "final_text": "..."}
- v1/agent/stream ์ด๋ฒคํŠธ: {"text": "...", "finished": true}
"""
# synthesis ๋…ธ๋“œ final_text ์šฐ์„ 
for ev in reversed(events):
if ev.get("node") == "synthesis" and ev.get("final_text"):
return ev["final_text"]
# v1 ์ŠคํŠธ๋ฆฌ๋ฐ ํ˜ธํ™˜: finished=true์ธ ๋งˆ์ง€๋ง‰ ์ด๋ฒคํŠธ์˜ text
for ev in reversed(events):
if ev.get("finished") and ev.get("text"):
return ev["text"]
# ์ „์ฒด ์ด๋ฒคํŠธ์—์„œ non-empty text๋ฅผ ์ด์–ด๋ถ™์ธ๋‹ค (fallback)
chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events]
return "".join(c for c in chunks if c)
def _contains_legal_keyword(text: str) -> bool:
return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS)
# ---------------------------------------------------------------------------
# ์‹œ๋‚˜๋ฆฌ์˜ค ๊ตฌํ˜„
# ---------------------------------------------------------------------------
async def scenario1_health_check() -> dict:
"""Scenario 1: Health Check."""
t0 = time.monotonic()
try:
status_code, body = await http_get("/health")
elapsed = time.monotonic() - t0
if status_code != 200:
return _record(1, "Health Check", False, elapsed, f"HTTP {status_code}", {"body": body})
# api_server.py: /health๋Š” "status": "healthy" ๋ฐ˜ํ™˜
srv_status = body.get("status", "")
if srv_status not in ("ok", "healthy"):
return _record(
1,
"Health Check",
False,
elapsed,
f"status ํ•„๋“œ๊ฐ€ ok/healthy๊ฐ€ ์•„๋‹˜: {srv_status!r}",
{"body": body},
)
return _record(1, "Health Check", True, elapsed, detail={"status": srv_status})
except Exception as exc:
return _record(1, "Health Check", False, time.monotonic() - t0, str(exc))
async def scenario2_base_model_generation() -> dict:
"""Scenario 2: Base Model Generation (OpenAI-compatible /v1/completions).
vLLM์ด /v1/completions ์—”๋“œํฌ์ธํŠธ๋ฅผ ์ง์ ‘ ๋…ธ์ถœํ•œ๋‹ค.
GovOn api_server.py์— ๊ตฌํ˜„๋˜์–ด ์žˆ์ง€ ์•Š์œผ๋ฏ€๋กœ vLLM ๋ ˆ์ด์–ด ์—”๋“œํฌ์ธํŠธ๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.
์„œ๋ฒ„๊ฐ€ /v1/completions๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฉด /v1/generate ๋ ˆ๊ฑฐ์‹œ๋กœ fallbackํ•œ๋‹ค.
"""
t0 = time.monotonic()
body_completions = {
"model": BASE_MODEL,
"prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ ์ˆ˜๋„๋Š” ์–ด๋””์ž…๋‹ˆ๊นŒ?",
"max_tokens": 64,
"temperature": 0.0,
}
try:
status_code, resp = await http_post("/v1/completions", body_completions)
elapsed = time.monotonic() - t0
# vLLM /v1/completions ์‘๋‹ต ๊ตฌ์กฐ ํ™•์ธ
if status_code == 200:
choices = resp.get("choices", [])
if choices and choices[0].get("text") is not None:
text = choices[0]["text"]
return _record(
2,
"Base Model Generation",
True,
elapsed,
detail={"endpoint": "/v1/completions", "text_preview": text[:100]},
)
return _record(
2, "Base Model Generation", False, elapsed, "choices[0].text ์—†์Œ", {"resp": resp}
)
# /v1/completions ๋ฏธ์ง€์› ์‹œ /v1/generate ๋ ˆ๊ฑฐ์‹œ๋กœ fallback
body_legacy = {
"prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ ์ˆ˜๋„๋Š” ์–ด๋””์ž…๋‹ˆ๊นŒ?",
"max_tokens": 64,
"temperature": 0.0,
"use_rag": False,
}
status_code2, resp2 = await http_post("/v1/generate", body_legacy)
elapsed2 = time.monotonic() - t0
if status_code2 == 200 and resp2.get("text"):
return _record(
2,
"Base Model Generation",
True,
elapsed2,
detail={"endpoint": "/v1/generate (fallback)", "text_preview": resp2["text"][:100]},
)
return _record(
2,
"Base Model Generation",
False,
elapsed2,
f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}",
{"completions_resp": resp, "generate_resp": resp2},
)
except Exception as exc:
return _record(2, "Base Model Generation", False, time.monotonic() - t0, str(exc))
async def _call_agent(
message: str,
session_id: str,
use_stream: bool = True,
) -> tuple[bool, str, Optional[str]]:
"""์—์ด์ „ํŠธ ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ˜ธ์ถœํ•˜๊ณ  (์„ฑ๊ณต์—ฌ๋ถ€, ์‘๋‹ตํ…์ŠคํŠธ, ์—๋Ÿฌ) ๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
v2/agent/stream (SSE) โ†’ v2/agent/run (REST) ์ˆœ์œผ๋กœ ์‹œ๋„ํ•œ๋‹ค.
use_rag=False๋ฅผ ๊ธฐ๋ณธ์œผ๋กœ ์ „๋‹ฌํ•˜์—ฌ LoRA ๊ฒฝ๋กœ๋ฅผ ๊ฐ•์ œํ•œ๋‹ค.
"""
body = {"query": message, "session_id": session_id, "use_rag": False}
# v2/agent/stream ์‹œ๋„ (SSE)
if use_stream:
try:
status_code, events = await http_post_sse("/v2/agent/stream", body)
if status_code == 200 and events:
text = _extract_text_from_events(events)
if text:
return True, text, None
# ์ด๋ฒคํŠธ๋Š” ์ˆ˜์‹ ํ–ˆ์ง€๋งŒ text๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ โ€” error ์ด๋ฒคํŠธ ํ™•์ธ
for ev in events:
if ev.get("status") == "error":
return False, "", ev.get("error", "unknown error")
# __interrupt__ ๋˜๋Š” awaiting_approval ์ด๋ฒคํŠธ โ†’ ์ž๋™ ์Šน์ธ ํ›„ ์ตœ์ข… ํ…์ŠคํŠธ ์ˆ˜์ง‘
# LangGraph interrupt()๋Š” "__interrupt__" ๋…ธ๋“œ๋กœ emit๋จ
awaiting = next(
(
ev
for ev in events
if ev.get("status") == "awaiting_approval"
or ev.get("node") == "__interrupt__"
),
None,
)
if awaiting:
thread_id = awaiting.get("thread_id") or session_id
try:
approve_code, approve_resp = await http_post(
f"/v2/agent/approve?thread_id={thread_id}&approved=true", {}
)
if approve_code == 200:
final_text = approve_resp.get("text", "") or approve_resp.get(
"final_text", ""
)
if final_text:
return True, final_text, None
return False, "", f"approve 200 but text ์—†์Œ: {approve_resp}"
return False, "", f"approve HTTP {approve_code}: {approve_resp}"
except Exception as approve_exc:
return False, "", f"approve ํ˜ธ์ถœ ์‹คํŒจ: {approve_exc}"
return False, "", f"SSE ์ด๋ฒคํŠธ ์ˆ˜์‹ ํ–ˆ์œผ๋‚˜ text ์—†์Œ (events={len(events)})"
except Exception as exc:
logger.warning("Stream error: %s", exc) # fallback to /v2/agent/run
# v2/agent/run ์‹œ๋„ (REST)
try:
status_code, resp = await http_post("/v2/agent/run", body)
if status_code == 200:
text = resp.get("text", "") or resp.get("final_text", "")
if resp.get("status") == "error":
return False, text, resp.get("error", "agent run error")
if text:
return True, text, None
# awaiting_approval ์ƒํƒœ โ€” ์‹ค์ œ ํ…์ŠคํŠธ ์ƒ์„ฑ ์—†์Œ์œผ๋กœ failure ์ฒ˜๋ฆฌ
if resp.get("status") == "awaiting_approval":
return (
False,
"",
f"awaiting_approval: ํ…์ŠคํŠธ ๋ฏธ์ƒ์„ฑ (thread_id={resp.get('thread_id')})",
)
return False, "", f"text ์—†์Œ, status={resp.get('status')}"
return False, "", f"HTTP {status_code}: {resp}"
except Exception as exc:
return False, "", str(exc)
# Scenario 3/4 ๊ณต์œ  ์„ธ์…˜ ID (๋™์ผ run์—์„œ ๊ฐ™์€ ์„ธ์…˜ ์‚ฌ์šฉ)
_RUN_SESSION_ID = str(uuid4())
async def scenario3_civil_lora() -> dict:
"""Scenario 3: Civil LoRA โ€” draft_civil_response (v2/agent/stream)."""
t0 = time.monotonic()
try:
ok, text, err = await _call_agent(
message="์ฃผ์ฐจ ์œ„๋ฐ˜ ๊ณผํƒœ๋ฃŒ ์ด์˜์‹ ์ฒญ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
session_id=_RUN_SESSION_ID,
)
elapsed = time.monotonic() - t0
if not ok:
return _record(
3,
"Civil LoRA (draft_civil_response)",
False,
elapsed,
err,
{"text_preview": text[:200] if text else ""},
)
if not text.strip():
return _record(
3, "Civil LoRA (draft_civil_response)", False, elapsed, "์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Œ"
)
return _record(
3,
"Civil LoRA (draft_civil_response)",
True,
elapsed,
detail={"text_preview": text[:200]},
)
except Exception as exc:
return _record(
3, "Civil LoRA (draft_civil_response)", False, time.monotonic() - t0, str(exc)
)
async def scenario4_legal_lora() -> dict:
"""Scenario 4: Legal LoRA โ€” append_evidence (v2/agent/stream).
๋…๋ฆฝ ์„ธ์…˜์—์„œ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ ์š”์ฒญ ํ›„ ๋™์ผ ์„ธ์…˜์—์„œ ๋ฒ•๋ น ๊ทผ๊ฑฐ ๋ณด๊ฐ•์„ ์š”์ฒญํ•œ๋‹ค.
์‘๋‹ต์— ๋ฒ•๋ น/์กฐํ•ญ ๊ด€๋ จ ํŒจํ„ด์ด ํฌํ•จ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•œ๋‹ค.
"""
t0 = time.monotonic()
session_id = str(uuid4())
try:
# ๋™์ผ ์„ธ์…˜์—์„œ civil ์š”์ฒญ ๋จผ์ € (append_evidence๋Š” ์ด์ „ ๋‹ต๋ณ€ ์ปจํ…์ŠคํŠธ ํ•„์š”)
ok_civil, _, err_civil = await _call_agent(
message="๊ฑด์ถ• ํ—ˆ๊ฐ€ ์‹ ์ฒญ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
session_id=session_id,
)
if not ok_civil:
elapsed = time.monotonic() - t0
return _record(
4,
"Legal LoRA (append_evidence)",
False,
elapsed,
f"civil ์„ ํ–‰ ์š”์ฒญ ์‹คํŒจ: {err_civil}",
)
ok, text, err = await _call_agent(
message="์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น๊ณผ ํŒ๋ก€ ๊ทผ๊ฑฐ๋ฅผ ๋ณด๊ฐ•ํ•ด์ค˜",
session_id=session_id,
)
elapsed = time.monotonic() - t0
if not ok:
return _record(
4,
"Legal LoRA (append_evidence)",
False,
elapsed,
err,
{"text_preview": text[:200] if text else ""},
)
if not text.strip():
return _record(
4, "Legal LoRA (append_evidence)", False, elapsed, "์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Œ"
)
has_legal = _contains_legal_keyword(text)
matched = [p for p in LEGAL_PATTERNS if re.search(p, text)]
detail = {
"has_legal_keyword": has_legal,
"matched_patterns": matched,
"text_preview": text[:300],
}
if not has_legal:
return _record(
4,
"Legal LoRA (append_evidence)",
False,
elapsed,
f"๋ฒ•๋ น ํŒจํ„ด ๋ฏธ๋ฐœ๊ฒฌ ({LEGAL_PATTERNS[:3]}...)",
detail,
)
return _record(4, "Legal LoRA (append_evidence)", True, elapsed, detail=detail)
except Exception as exc:
return _record(4, "Legal LoRA (append_evidence)", False, time.monotonic() - t0, str(exc))
async def scenario5_sequential_multi_lora_switching() -> dict:
"""Scenario 5: Sequential Multi-LoRA Switching (civil โ†’ legal x3).
civil ์š”์ฒญ โ†’ legal ์š”์ฒญ์„ 3ํšŒ ๋ฐ˜๋ณตํ•˜์—ฌ LoRA ์ „ํ™˜ ์˜ค๋ฅ˜๊ฐ€ ์—†๋Š”์ง€ ํ™•์ธํ•œ๋‹ค.
๋ฐ˜๋ณต๋งˆ๋‹ค ๋ณ„๋„์˜ UUID ์„ธ์…˜ ID๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.
"""
t0 = time.monotonic()
errors: list[str] = []
iterations = 3
for i in range(1, iterations + 1):
session_id = str(uuid4())
# civil ์š”์ฒญ
ok, text, err = await _call_agent(
message="ํ–‰์ •์ฒ˜๋ถ„ ์ด์˜์‹ ์ฒญ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
session_id=session_id,
)
if not ok or not text.strip():
errors.append(f"iter {i} civil: {err or '๋นˆ ์‘๋‹ต'}")
continue
# legal ์š”์ฒญ (๋™์ผ ์„ธ์…˜)
ok2, text2, err2 = await _call_agent(
message="์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น ๊ทผ๊ฑฐ๋ฅผ ์ถ”๊ฐ€ํ•ด์ค˜",
session_id=session_id,
)
if not ok2 or not text2.strip():
errors.append(f"iter {i} legal: {err2 or '๋นˆ ์‘๋‹ต'}")
elapsed = time.monotonic() - t0
if errors:
return _record(
5,
"Sequential Multi-LoRA Switching",
False,
elapsed,
"; ".join(errors),
{"iterations": iterations, "errors": errors},
)
return _record(
5,
"Sequential Multi-LoRA Switching",
True,
elapsed,
detail={"iterations": iterations, "all_passed": True},
)
async def scenario6_lora_id_consistency() -> dict:
"""Scenario 6: LoRA ID Consistency Check (์ •๋ณด์„ฑ).
/v1/models (vLLM OpenAI-compatible)์—์„œ civil/legal ์–ด๋Œ‘ํ„ฐ ๋…ธ์ถœ ์—ฌ๋ถ€๋ฅผ ํ™•์ธํ•œ๋‹ค.
vLLM์€ ๋ฒ„์ „/์„ค์ •์— ๋”ฐ๋ผ LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ /v1/models์— ๋…ธ์ถœํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ,
๋ฏธ๊ฐ์ง€ ์‹œ FAIL์ด ์•„๋‹Œ WARNING์œผ๋กœ ๊ธฐ๋กํ•˜๊ณ  ์ „์ฒด ๊ฒฐ๊ณผ์— ์˜ํ–ฅ์„ ์ฃผ์ง€ ์•Š๋Š”๋‹ค.
"""
t0 = time.monotonic()
try:
status_code, health = await http_get("/health")
elapsed = time.monotonic() - t0
if status_code != 200:
return _record(
6, "LoRA ID Consistency Check", False, elapsed, f"/health HTTP {status_code}"
)
detail: dict = {"health_status": health.get("status")}
# /health feature_flags / agents_loaded ์ •๋ณด ๊ธฐ๋ก
detail["agents_loaded"] = health.get("agents_loaded", [])
detail["model"] = health.get("model", "")
detail["feature_flags"] = health.get("feature_flags", {})
civil_found = False
legal_found = False
# /v1/models ์‹œ๋„ (vLLM OpenAI-compatible)
try:
models_status, models_resp = await http_get("/v1/models")
if models_status == 200:
model_ids = [m.get("id", "") for m in models_resp.get("data", [])]
detail["v1_models"] = model_ids
civil_found = any("civil" in mid for mid in model_ids)
legal_found = any("legal" in mid for mid in model_ids)
detail["civil_adapter_in_models"] = civil_found
detail["legal_adapter_in_models"] = legal_found
except Exception as exc:
logger.warning("Failed to fetch /v1/models: %s", exc)
detail["v1_models"] = "unavailable"
# vLLM์ด /v1/models์— ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋…ธ์ถœํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์ •๋ณด์„ฑ ๊ธฐ๋ก๋งŒ ์ˆ˜ํ–‰
if not civil_found or not legal_found:
missing = []
if not civil_found:
missing.append("civil")
if not legal_found:
missing.append("legal")
detail["warning"] = f"์–ด๋Œ‘ํ„ฐ ๋ฏธ๊ฐ์ง€ (vLLM ๋ฒ„์ „์— ๋”ฐ๋ผ ์ •์ƒ): {', '.join(missing)}"
logger.warning(detail["warning"])
return _record(6, "LoRA ID Consistency Check", True, time.monotonic() - t0, detail=detail)
except Exception as exc:
return _record(6, "LoRA ID Consistency Check", False, time.monotonic() - t0, str(exc))
# ---------------------------------------------------------------------------
# ๋ฉ”์ธ ๋Ÿฌ๋„ˆ
# ---------------------------------------------------------------------------
async def main() -> int:
print("GovOn Legal LoRA ์„œ๋น™ ํ†ตํ•ฉ ๊ฒ€์ฆ")
print(f" ๋Œ€์ƒ ์„œ๋ฒ„: {BASE_URL}")
print(f" ์ธ์ฆ: {'API_KEY ์„ค์ •๋จ' if API_KEY else '๋ฏธ์„ค์ • (๋น„์ธ์ฆ)'}")
print(f" HTTP ๋ฐฑ์—”๋“œ: {_HTTP_BACKEND}")
print(f" ํƒ€์ž„์•„์›ƒ: {TIMEOUT}s / ์‹œ๋‚˜๋ฆฌ์˜ค")
print("-" * 60)
scenarios = [
scenario1_health_check,
scenario2_base_model_generation,
scenario3_civil_lora,
scenario4_legal_lora,
scenario5_sequential_multi_lora_switching,
scenario6_lora_id_consistency,
]
for fn in scenarios:
await fn()
print("-" * 60)
passed = sum(1 for r in _results if r["passed"])
failed = len(_results) - passed
print(f"๊ฒฐ๊ณผ: {passed}/{len(_results)} ํ†ต๊ณผ, {failed} ์‹คํŒจ")
# JSON ๊ฒฐ๊ณผ ์ €์žฅ
output = {
"server_url": BASE_URL,
"http_backend": _HTTP_BACKEND,
"total": len(_results),
"passed": passed,
"failed": failed,
"scenarios": _results,
}
with open(RESULTS_PATH, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"๊ฒฐ๊ณผ ์ €์žฅ: {RESULTS_PATH}")
return 0 if failed == 0 else 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)