Spaces:

umyunsang
/

govon-runtime

Paused

App Files Files Community

umyunsang commited on 11 days ago

Commit

769e684

verified ·

1 Parent(s): 7ec9957

sync: scripts/ (verify_e2e_tool_calling.py)

Browse files

Files changed (14) hide show

scripts/.DS_Store +0 -0
scripts/__pycache__/verify_e2e_tool_calling.cpython-313.pyc +0 -0
scripts/deploy-hfspace.sh +81 -0
scripts/deploy.sh +279 -0
scripts/final_api_check.py +57 -0
scripts/govon-bootstrap.sh +287 -0
scripts/offline-deploy.sh +128 -0
scripts/smoke-test.sh +46 -0
scripts/test_alio_only.py +42 -0
scripts/test_api_keys.py +71 -0
scripts/test_law_https.py +25 -0
scripts/verify_e2e_tool_calling.py +1645 -0
scripts/verify_lora_serving.py +663 -0
scripts/verify_results.json +83 -0

scripts/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

scripts/__pycache__/verify_e2e_tool_calling.cpython-313.pyc ADDED Viewed

Binary file (61.3 kB). View file

scripts/deploy-hfspace.sh ADDED Viewed

	@@ -0,0 +1,81 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# GovOn Runtime을 HuggingFace Spaces에 배포하는 스크립트
+# Usage: ./scripts/deploy-hfspace.sh
+export SPACE_REPO="${SPACE_REPO:-umyunsang/govon-runtime}"
+export HF_TOKEN="${HF_TOKEN:?HF_TOKEN 환경변수가 필요합니다}"
+echo "=== GovOn HF Spaces 배포 ==="
+echo "Space: $SPACE_REPO"
+# 1. Space 생성 (이미 있으면 skip)
+python3 -c "
+import os
+from huggingface_hub import create_repo
+is_private = os.environ.get('SPACE_PRIVATE', 'false').lower() in ('true', '1', 'yes')
+create_repo(os.environ['SPACE_REPO'], repo_type='space', space_sdk='docker', exist_ok=True, token=os.environ['HF_TOKEN'], private=is_private)
+print('Space repo ready')
+"
+# 2. 필요 파일 업로드
+python3 -c "
+import os
+from huggingface_hub import HfApi
+api = HfApi(token=os.environ['HF_TOKEN'])
+space_repo = os.environ['SPACE_REPO']
+# Dockerfile
+api.upload_file(path_or_fileobj='Dockerfile.hfspace', path_in_repo='Dockerfile',
+    repo_id=space_repo, repo_type='space')
+# requirements.txt
+api.upload_file(path_or_fileobj='requirements.txt', path_in_repo='requirements.txt',
+    repo_id=space_repo, repo_type='space')
+# src/ 디렉터리
+api.upload_folder(folder_path='src', path_in_repo='src',
+    repo_id=space_repo, repo_type='space',
+    ignore_patterns=['__pycache__', '*.pyc', '.pytest_cache'])
+# agents/ 디렉터리 (존재하면)
+if os.path.isdir('agents'):
+    api.upload_folder(folder_path='agents', path_in_repo='agents',
+        repo_id=space_repo, repo_type='space')
+print('Files uploaded')
+"
+# 3. Secrets 설정
+python3 -c "
+import os
+from huggingface_hub import HfApi
+api = HfApi(token=os.environ['HF_TOKEN'])
+api.add_space_secret(os.environ['SPACE_REPO'], 'HF_TOKEN', os.environ['HF_TOKEN'])
+# ADAPTER_PATHS: HF Hub repo ID 방식 (vLLM이 자동 다운로드)
+adapter_paths = os.environ.get('ADAPTER_PATHS', 'civil=umyunsang/govon-civil-adapter,legal=siwo/govon-legal-adapter')
+api.add_space_secret(os.environ['SPACE_REPO'], 'ADAPTER_PATHS', adapter_paths)
+print('Secrets configured')
+"
+# 4. 하드웨어 설정 (L4 24GB — base 20GB + adapters ~1GB)
+# 권한/쿼터 부족 시 경고만 출력하고 계속 진행
+python3 -c "
+import os
+from huggingface_hub import HfApi
+api = HfApi(token=os.environ['HF_TOKEN'])
+try:
+    api.request_space_hardware(os.environ['SPACE_REPO'], 'l4x1')
+    print('Hardware set to l4x1 (24GB VRAM)')
+except Exception as e:
+    print(f'WARNING: 하드웨어 설정 실패 (수동으로 설정 필요): {e}')
+" || true
+echo ""
+echo "=== 배포 완료 ==="
+echo "Space URL: https://huggingface.co/spaces/$SPACE_REPO"
+echo "하드웨어: L4 24GB (자동 설정됨)"
+echo ""
+echo "GPU 검증 실행:"
+echo "  GOVON_RUNTIME_URL=https://<space-url> python3 scripts/verify_lora_serving.py"

scripts/deploy.sh ADDED Viewed

	@@ -0,0 +1,279 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# ──────────────────────────────────────────────
+# GovOn Blue/Green Deployment Script
+#
+# Usage:
+#   ./scripts/deploy.sh deploy <image-tag>     Deploy new version
+#   ./scripts/deploy.sh rollback               Rollback to previous version
+#   ./scripts/deploy.sh status                 Show current deployment status
+#   ./scripts/deploy.sh health                 Check health of active deployment
+# ──────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+COMPOSE_FILE="${PROJECT_DIR}/docker-compose.prod.yml"
+STATE_FILE="${PROJECT_DIR}/.deploy-state"
+HEALTH_TIMEOUT=120
+HEALTH_INTERVAL=5
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+log_info()  { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn()  { echo -e "${YELLOW}[WARN]${NC} $1"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+# ──────────────────────────────────────────────
+# State management
+# ──────────────────────────────────────────────
+get_active_slot() {
+  if [ -f "$STATE_FILE" ]; then
+    cat "$STATE_FILE"
+  else
+    echo "none"
+  fi
+}
+get_inactive_slot() {
+  local active
+  active=$(get_active_slot)
+  if [ "$active" = "blue" ]; then
+    echo "green"
+  else
+    echo "blue"
+  fi
+}
+get_slot_port() {
+  local slot=$1
+  if [ "$slot" = "blue" ]; then echo 8001; else echo 8002; fi
+}
+# ──────────────────────────────────────────────
+# Health check with retry
+# ──────────────────────────────────────────────
+wait_for_health() {
+  local port=$1
+  local elapsed=0
+  log_info "헬스체크 대기 중 (포트: ${port}, 타임아웃: ${HEALTH_TIMEOUT}초)..."
+  while [ $elapsed -lt $HEALTH_TIMEOUT ]; do
+    if curl -sf "http://localhost:${port}/health" > /dev/null 2>&1; then
+      echo ""
+      log_info "헬스체크 통과 (${elapsed}초 소요)"
+      return 0
+    fi
+    sleep $HEALTH_INTERVAL
+    elapsed=$((elapsed + HEALTH_INTERVAL))
+    printf "."
+  done
+  echo ""
+  log_error "헬스체크 실패 (${HEALTH_TIMEOUT}초 타임아웃)"
+  return 1
+}
+# ──────────────────────────────────────────────
+# Prerequisites check
+# ──────────────────────────────────────────────
+check_prerequisites() {
+  if ! command -v docker &>/dev/null; then
+    log_error "Docker가 설치되어 있지 않습니다."
+    exit 1
+  fi
+  if ! docker compose version &>/dev/null; then
+    log_error "Docker Compose가 설치되어 있지 않습니다."
+    exit 1
+  fi
+  if [ ! -f "$COMPOSE_FILE" ]; then
+    log_error "Compose 파일을 찾을 수 없습니다: ${COMPOSE_FILE}"
+    exit 1
+  fi
+}
+# ──────────────────────────────────────────────
+# Deploy new version
+# ──────────────────────────────────────────────
+cmd_deploy() {
+  local image_tag="${1:-latest}"
+  local active
+  local target
+  local target_port
+  active=$(get_active_slot)
+  target=$(get_inactive_slot)
+  target_port=$(get_slot_port "$target")
+  check_prerequisites
+  log_info "=== GovOn 배포 시작: v${image_tag} ==="
+  log_info "현재 활성 슬롯: ${active}"
+  log_info "배포 대상 슬롯: ${target}"
+  echo ""
+  # Set the tag for the target slot
+  if [ "$target" = "blue" ]; then
+    export BLUE_TAG="$image_tag"
+  else
+    export GREEN_TAG="$image_tag"
+  fi
+  # Pull new image
+  log_info "이미지 풀링: ghcr.io/govon-org/govon:${image_tag}..."
+  docker pull "ghcr.io/govon-org/govon:${image_tag}"
+  # Create volume directories
+  mkdir -p "${PROJECT_DIR}/models" "${PROJECT_DIR}/data" "${PROJECT_DIR}/agents" "${PROJECT_DIR}/configs"
+  # Start target slot
+  log_info "${target} 슬롯 시작 중..."
+  docker compose -f "$COMPOSE_FILE" --profile "$target" up -d
+  # Wait for health
+  if wait_for_health "$target_port"; then
+    log_info "${target} 배포가 정상 작동합니다!"
+    # Update state
+    echo "$target" > "$STATE_FILE"
+    log_info "활성 슬롯 변경: ${active} -> ${target}"
+    # Stop previous slot
+    if [ "$active" != "none" ]; then
+      log_info "이전 ${active} 슬롯 중지 중..."
+      docker compose -f "$COMPOSE_FILE" --profile "$active" down
+    fi
+    echo ""
+    log_info "=== 배포 완료 ==="
+    cmd_status
+  else
+    log_error "배포 실패! 롤백 수행 중..."
+    docker compose -f "$COMPOSE_FILE" --profile "$target" down
+    log_error "실패한 배포를 정리했습니다. 이전 버전이 계속 활성 상태입니다."
+    exit 1
+  fi
+}
+# ──────────────────────────────────────────────
+# Rollback to previous version
+# ──────────────────────────────────────────────
+cmd_rollback() {
+  local active
+  local previous
+  local prev_port
+  active=$(get_active_slot)
+  previous=$(get_inactive_slot)
+  prev_port=$(get_slot_port "$previous")
+  check_prerequisites
+  if [ "$active" = "none" ]; then
+    log_error "롤백할 활성 배포가 없습니다."
+    exit 1
+  fi
+  log_warn "=== 롤백 시작: ${active} -> ${previous} ==="
+  # Start previous slot
+  docker compose -f "$COMPOSE_FILE" --profile "$previous" up -d
+  if wait_for_health "$prev_port"; then
+    # Stop current active
+    docker compose -f "$COMPOSE_FILE" --profile "$active" down
+    echo "$previous" > "$STATE_FILE"
+    echo ""
+    log_info "=== 롤백 완료. 활성 슬롯: ${previous} ==="
+    cmd_status
+  else
+    log_error "롤백 실패! 수동 조치가 필요합니다."
+    log_error "현재 활성 슬롯(${active})은 그대로 유지됩니다."
+    docker compose -f "$COMPOSE_FILE" --profile "$previous" down
+    exit 1
+  fi
+}
+# ──────────────────────────────────────────────
+# Show deployment status
+# ──────────────────────────────────────────────
+cmd_status() {
+  local active
+  active=$(get_active_slot)
+  local blue_status
+  local green_status
+  blue_status=$(docker ps --filter name=govon-blue --format '{{.Status}}' 2>/dev/null || echo "stopped")
+  green_status=$(docker ps --filter name=govon-green --format '{{.Status}}' 2>/dev/null || echo "stopped")
+  [ -z "$blue_status" ] && blue_status="stopped"
+  [ -z "$green_status" ] && green_status="stopped"
+  echo ""
+  echo "========================================"
+  echo "       GovOn 배포 상태"
+  echo "========================================"
+  echo " 활성 슬롯  : ${active}"
+  echo " Blue  (8001): ${blue_status}"
+  echo " Green (8002): ${green_status}"
+  echo "========================================"
+}
+# ──────────────────────────────────────────────
+# Health check
+# ──────────────────────────────────────────────
+cmd_health() {
+  local active
+  local port
+  active=$(get_active_slot)
+  if [ "$active" = "none" ]; then
+    log_error "활성 배포가 없습니다."
+    exit 1
+  fi
+  port=$(get_slot_port "$active")
+  if curl -sf "http://localhost:${port}/health" > /dev/null 2>&1; then
+    log_info "활성 배포(${active})가 정상입니다."
+  else
+    log_error "활성 배포(${active})가 비정상입니다!"
+    exit 1
+  fi
+}
+# ──────────────────────────────────────────────
+# Main
+# ──────────────────────────────────────────────
+case "${1:-help}" in
+  deploy)   cmd_deploy "${2:-latest}" ;;
+  rollback) cmd_rollback ;;
+  status)   cmd_status ;;
+  health)   cmd_health ;;
+  *)
+    echo "GovOn Blue/Green 배포 스크립트"
+    echo ""
+    echo "사용법: $0 {deploy <tag>|rollback|status|health}"
+    echo ""
+    echo "명령어:"
+    echo "  deploy <tag>   새 버전 배포 (기본값: latest)"
+    echo "  rollback       이전 버전으로 롤백"
+    echo "  status         현재 배포 상태 확인"
+    echo "  health         활성 배포 헬스체크"
+    exit 1
+    ;;
+esac

scripts/final_api_check.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import requests
+def test_law():
+    oc = os.getenv("LAW_GO_KR_OC")
+    url = f"http://www.law.go.kr/DRF/lawSearch.do?target=law&query=민원&type=XML&OC={oc}"
+    try:
+        res = requests.get(url, timeout=10)
+        print(f"[LAW] Status: {res.status_code}")
+        if "사용자 정보 검증에 실패" in res.text:
+            print("[LAW] ❌ IP 미승인 상태 (등록한 IP가 반영되지 않았거나 다름)")
+        elif "<law" in res.text:
+            print("[LAW] ✅ 인증 성공! 데이터 수집 가능")
+        else:
+            print(f"[LAW] ⚠️ 응답 확인 필요 (내용 일부): {res.text[:200]}")
+    except Exception as e:
+        print(f"[LAW] ❌ 에러: {e}")
+def test_alio():
+    key = os.getenv("DATA_GO_KR_API_KEY")
+    # Decoding 키 사용
+    url = "https://apis.data.go.kr/1051000/public_inst/list"
+    params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
+    try:
+        res = requests.get(url, params=params, timeout=10)
+        print(f"[ALIO] Status: {res.status_code}")
+        if res.status_code == 200:
+            if "SERVICE_KEY_IS_NOT_REGISTERED" in res.text:
+                print("[ALIO] ❌ 키 미활성 상태 (동기화 대기 중)")
+            elif "INVALID_REQUEST_PARAMETER_ERROR" in res.text:
+                print("[ALIO] ❌ 파라미터 오류")
+            else:
+                try:
+                    data = res.json()
+                    # 결과 코드 확인
+                    res_code = data.get("response", {}).get("header", {}).get("resultCode")
+                    if res_code == "00":
+                        print("[ALIO] ✅ 인증 성공! 데이터 수집 가능")
+                    else:
+                        print(f"[ALIO] ❌ 결과 오류 (코드: {res_code})")
+                except:
+                    print(f"[ALIO] ⚠️ 비정상 응답 (내용 일부): {res.text[:200]}")
+        else:
+            print(f"[ALIO] ❌ HTTP 오류: {res.status_code}")
+    except Exception as e:
+        print(f"[ALIO] ❌ 연결 에러: {e}")
+if __name__ == "__main__":
+    print("-" * 50)
+    print("🚀 API 최종 유효성 검사 시작")
+    test_law()
+    test_alio()
+    print("-" * 50)

scripts/govon-bootstrap.sh ADDED Viewed

	@@ -0,0 +1,287 @@

+#!/usr/bin/env bash
+# GovOn daemon bootstrap script
+# Usage: ./scripts/govon-bootstrap.sh [start|stop|status|health]
+#
+# 환경변수:
+#   GOVON_HOME   — GovOn 홈 디렉터리 (기본: ~/.govon)
+#   GOVON_PORT   — daemon 포트 (기본: 8000)
+#   SKIP_MODEL_LOAD — 모델 로드 건너뛰기 (경고 표시됨)
+set -euo pipefail
+PYTHON_CMD=""
+# ---------------------------------------------------------------------------
+# 설정
+# ---------------------------------------------------------------------------
+GOVON_HOME="${GOVON_HOME:-$HOME/.govon}"
+GOVON_PORT="${GOVON_PORT:-8000}"
+HEALTH_URL="http://127.0.0.1:${GOVON_PORT}/health"
+PID_FILE="${GOVON_HOME}/daemon.pid"
+LOG_FILE="${GOVON_HOME}/daemon.log"
+# ---------------------------------------------------------------------------
+# 색상 출력 헬퍼
+# ---------------------------------------------------------------------------
+_info()    { echo "[INFO]  $*"; }
+_warn()    { echo "[WARN]  $*" >&2; }
+_error()   { echo "[ERROR] $*" >&2; }
+_success() { echo "[OK]    $*"; }
+# ---------------------------------------------------------------------------
+# Pre-flight 검사
+# ---------------------------------------------------------------------------
+_preflight_checks() {
+    # SKIP_MODEL_LOAD 경고
+    if [ "${SKIP_MODEL_LOAD:-}" = "true" ] || [ "${SKIP_MODEL_LOAD:-}" = "1" ]; then
+        _warn "SKIP_MODEL_LOAD가 설정되어 있습니다. 모델이 로드되지 않으며 일부 기능이 비활성화됩니다."
+    fi
+    # GPU 감지 경고
+    if command -v nvidia-smi &>/dev/null; then
+        if ! nvidia-smi &>/dev/null 2>&1; then
+            _warn "nvidia-smi 실행에 실패했습니다. GPU를 사용할 수 없을 수 있습니다."
+        fi
+    else
+        _warn "nvidia-smi를 찾을 수 없습니다. CPU 전용 모드로 실행됩니다. (성능이 크게 저하될 수 있습니다)"
+    fi
+}
+# ---------------------------------------------------------------------------
+# Python / govon 설치 확인
+# ---------------------------------------------------------------------------
+_check_python() {
+    if ! command -v python3 &>/dev/null && ! command -v python &>/dev/null; then
+        _error "Python을 찾을 수 없습니다. Python 3.10 이상을 설치하세요."
+        exit 1
+    fi
+    PYTHON_CMD="$(command -v python3 || command -v python)"
+    _info "Python: $("$PYTHON_CMD" --version 2>&1)"
+}
+_check_govon() {
+    # govon CLI 또는 src.cli.shell 모듈 가용 여부 확인
+    if command -v govon &>/dev/null; then
+        GOVON_CMD="govon"
+        _info "govon 명령어 발견: $(command -v govon)"
+    elif $PYTHON_CMD -c "import src.cli.shell" 2>/dev/null; then
+        GOVON_CMD="$PYTHON_CMD -m src.cli.shell"
+        _info "govon 모듈(src.cli.shell) 발견"
+    else
+        _error "govon이 설치되어 있지 않습니다. 'pip install govon[cli]' 또는 'pip install -e .[cli]'를 실행하세요."
+        exit 1
+    fi
+}
+# ---------------------------------------------------------------------------
+# PID 유틸리티
+# ---------------------------------------------------------------------------
+_read_pid() {
+    if [ -f "$PID_FILE" ]; then
+        awk '{print $1}' "$PID_FILE" 2>/dev/null || echo ""
+    fi
+}
+_pid_alive() {
+    local pid="$1"
+    [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
+}
+# ---------------------------------------------------------------------------
+# health 확인
+# ---------------------------------------------------------------------------
+_health_check() {
+    curl -sf --max-time 5 "$HEALTH_URL" &>/dev/null
+}
+# ---------------------------------------------------------------------------
+# 명령: start
+# ---------------------------------------------------------------------------
+cmd_start() {
+    _preflight_checks
+    _check_python
+    _check_govon
+    # 이미 실행 중인지 확인
+    local existing_pid
+    existing_pid="$(_read_pid)"
+    if _pid_alive "$existing_pid" && _health_check; then
+        _success "GovOn daemon이 이미 실행 중입니다. (PID=$existing_pid, 포트=$GOVON_PORT)"
+        exit 0
+    fi
+    # ~/.govon 디렉터리 생성
+    mkdir -p "$GOVON_HOME"
+    _info "GovOn daemon을 시작합니다. (포트=$GOVON_PORT, 로그=$LOG_FILE)"
+    # daemon 기동
+    if [ "$GOVON_CMD" = "govon" ]; then
+        # govon CLI를 통한 기동 (govon --start 지원 시 사용; 없으면 직접 uvicorn 호출)
+        if govon --help 2>&1 | grep -q -- "--start" 2>/dev/null; then
+            govon --start >> "$LOG_FILE" 2>&1 &
+        else
+            # 직접 uvicorn으로 기동
+            $PYTHON_CMD -m uvicorn src.inference.api_server:app \
+                --host 127.0.0.1 \
+                --port "$GOVON_PORT" >> "$LOG_FILE" 2>&1 &
+        fi
+    else
+        $PYTHON_CMD -m uvicorn src.inference.api_server:app \
+            --host 127.0.0.1 \
+            --port "$GOVON_PORT" >> "$LOG_FILE" 2>&1 &
+    fi
+    local daemon_pid=$!
+    echo "$daemon_pid $(date +%s)" > "$PID_FILE"
+    _info "daemon PID=$daemon_pid 기록 완료."
+    # 빠른 실패 감지: 2초 후 프로세스가 이미 종료되었는지 확인
+    sleep 2
+    if ! kill -0 "$daemon_pid" 2>/dev/null; then
+        _error "daemon이 기동 직후 종료되었습니다. 로그를 확인하세요: $LOG_FILE"
+        rm -f "$PID_FILE"
+        exit 1
+    fi
+    # health check 대기 (최대 120초)
+    local elapsed=0
+    local max_wait=120
+    _info "health check 대기 중..."
+    while [ $elapsed -lt $max_wait ]; do
+        if _health_check; then
+            _success "GovOn daemon 기동 완료. (PID=$daemon_pid, 포트=$GOVON_PORT)"
+            exit 0
+        fi
+        sleep 1
+        elapsed=$((elapsed + 1))
+    done
+    _error "health check timeout (${max_wait}s). 로그를 확인하세요: $LOG_FILE"
+    exit 1
+}
+# ---------------------------------------------------------------------------
+# 명령: stop
+# ---------------------------------------------------------------------------
+cmd_stop() {
+    local pid
+    pid="$(_read_pid)"
+    if [ -z "$pid" ]; then
+        _warn "PID 파일이 없습니다. daemon이 실행 중이 아닌 것으로 간주합니다."
+        exit 0
+    fi
+    if ! _pid_alive "$pid"; then
+        _warn "PID=$pid 프로세스가 없습니다. PID 파일을 제거합니다."
+        rm -f "$PID_FILE"
+        exit 0
+    fi
+    # govon CLI --stop 지원 여부 확인
+    if command -v govon &>/dev/null && govon --help 2>&1 | grep -q -- "--stop" 2>/dev/null; then
+        govon --stop
+    else
+        _info "SIGTERM 전송: PID=$pid"
+        kill -TERM "$pid"
+        local elapsed=0
+        while [ $elapsed -lt 10 ]; do
+            if ! _pid_alive "$pid"; then
+                _success "GovOn daemon이 정상 종료되었습니다. (PID=$pid)"
+                rm -f "$PID_FILE"
+                exit 0
+            fi
+            sleep 1
+            elapsed=$((elapsed + 1))
+        done
+        _warn "timeout — SIGKILL 전송: PID=$pid"
+        kill -KILL "$pid" 2>/dev/null || true
+        rm -f "$PID_FILE"
+        _success "GovOn daemon이 강제 종료되었습니다. (PID=$pid)"
+    fi
+}
+# ---------------------------------------------------------------------------
+# 명령: status
+# ---------------------------------------------------------------------------
+cmd_status() {
+    local pid
+    pid="$(_read_pid)"
+    if [ -z "$pid" ]; then
+        echo "GovOn daemon: 중지됨 (PID 파일 없음)"
+        exit 1
+    fi
+    if ! _pid_alive "$pid"; then
+        echo "GovOn daemon: 중지됨 (PID=$pid — 프로세스 없음)"
+        rm -f "$PID_FILE"
+        exit 1
+    fi
+    if _health_check; then
+        echo "GovOn daemon: 실행 중 (PID=$pid, 포트=$GOVON_PORT)"
+        exit 0
+    else
+        echo "GovOn daemon: 프로세스는 살아 있지만 health check 실패 (PID=$pid, URL=$HEALTH_URL)"
+        exit 2
+    fi
+}
+# ---------------------------------------------------------------------------
+# 명령: health
+# ---------------------------------------------------------------------------
+cmd_health() {
+    _info "GET $HEALTH_URL"
+    if curl -sf --max-time 10 "$HEALTH_URL"; then
+        echo ""
+        _success "health check 통과."
+        exit 0
+    else
+        _error "health check 실패. daemon이 실행 중인지 확인하세요."
+        exit 1
+    fi
+}
+# ---------------------------------------------------------------------------
+# 진입점
+# ---------------------------------------------------------------------------
+COMMAND="${1:-help}"
+case "$COMMAND" in
+    start)
+        cmd_start
+        ;;
+    stop)
+        cmd_stop
+        ;;
+    status)
+        cmd_status
+        ;;
+    health)
+        cmd_health
+        ;;
+    help|--help|-h)
+        echo "사용법: $0 [start|stop|status|health]"
+        echo ""
+        echo "명령어:"
+        echo "  start   — GovOn daemon을 기동합니다"
+        echo "  stop    — GovOn daemon을 중지합니다"
+        echo "  status  — daemon 실행 상태를 확인합니다"
+        echo "  health  — /health 엔드포인트를 probe합니다"
+        echo ""
+        echo "환경변수:"
+        echo "  GOVON_HOME=$GOVON_HOME"
+        echo "  GOVON_PORT=$GOVON_PORT"
+        echo "  SKIP_MODEL_LOAD (설정 시 경고 표시)"
+        exit 0
+        ;;
+    *)
+        _error "알 수 없는 명령: $COMMAND"
+        echo "사용법: $0 [start|stop|status|health]"
+        exit 1
+        ;;
+esac

scripts/offline-deploy.sh ADDED Viewed

	@@ -0,0 +1,128 @@

+#!/usr/bin/env bash
+set -euo pipefail
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+IMAGE_FILE="${PROJECT_DIR}/govon-image.tar.gz"
+ENV_TEMPLATE="${PROJECT_DIR}/.env.airgap.example"
+ENV_FILE="${PROJECT_DIR}/.env"
+API_KEY_PLACEHOLDER="CHANGE_ME_TO_SECURE_RANDOM_KEY"
+BM25_INDEX_HMAC_KEY_PLACEHOLDER="CHANGE_ME_TO_SECURE_HMAC_KEY"
+extract_env_value() {
+    local key="$1"
+    local file="$2"
+    awk -F= -v key="$key" '
+        $0 ~ "^[[:space:]]*" key "=" {
+            sub(/^[^=]*=/, "", $0)
+            print $0
+            exit
+        }
+    ' "$file"
+}
+require_secure_env_value() {
+    local key="$1"
+    local placeholder="$2"
+    local value
+    value="$(extract_env_value "$key" "$ENV_FILE")"
+    if [ -z "$value" ] || [ "$value" = "$placeholder" ]; then
+        echo "[ERROR] ${key} 값이 비어 있거나 예시 placeholder 그대로입니다."
+        echo "        ${ENV_FILE}에서 ${key}를 안전한 임의 문자열로 수정한 뒤 다시 실행하세요."
+        exit 1
+    fi
+}
+echo "=== GovOn 오프라인 배포 스크립트 ==="
+# 1. Docker 설치 확인
+if ! command -v docker &>/dev/null; then
+    echo "[ERROR] Docker가 설치되어 있지 않습니다."
+    echo "설치 가이드: https://docs.docker.com/engine/install/"
+    exit 1
+fi
+echo "[OK] Docker: $(docker --version)"
+# 2. Docker Compose 확인
+if ! docker compose version &>/dev/null; then
+    echo "[ERROR] Docker Compose가 설치되어 있지 않습니다."
+    exit 1
+fi
+echo "[OK] Docker Compose: $(docker compose version --short)"
+# 3. NVIDIA Container Toolkit 확인 (경고만)
+if docker info 2>/dev/null | grep -q "Runtimes.*nvidia"; then
+    echo "[OK] NVIDIA Container Toolkit 감지됨"
+else
+    echo "[WARNING] NVIDIA Container Toolkit이 감지되지 않았습니다."
+    echo "GPU 가속이 필요합니다: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
+fi
+# 4. 이미지 파일 확인 및 로드
+if [ ! -f "$IMAGE_FILE" ]; then
+    echo "[ERROR] 이미지 파일을 찾을 수 없습니다: $IMAGE_FILE"
+    exit 1
+fi
+echo "Docker 이미지 로드 중... (시간이 소요될 수 있습니다)"
+gunzip -c "$IMAGE_FILE" | docker load
+echo "[OK] 이미지 로드 완료"
+# 5. 환경변수 템플릿 준비
+if [ ! -f "$ENV_FILE" ] && [ -f "$ENV_TEMPLATE" ]; then
+    cp "$ENV_TEMPLATE" "$ENV_FILE"
+    echo "[OK] .env 파일을 .env.airgap.example 기준으로 생성했습니다."
+    echo "     API_KEY, BM25_INDEX_HMAC_KEY, CORS_ORIGINS 등을 수정한 뒤 재실행하세요."
+fi
+if [ -z "${MODEL_PATH:-}" ] && [ ! -f "$ENV_FILE" ]; then
+    echo "[INFO] MODEL_PATH가 설정되지 않았습니다."
+    echo "  오프라인 환경에서는 컨테이너 내부 경로를 지정하세요:"
+    echo "  export MODEL_PATH=/app/models/EXAONE-4.0-32B-AWQ"
+fi
+if [ ! -f "$ENV_FILE" ]; then
+    echo "[ERROR] 환경변수 파일을 찾을 수 없습니다: $ENV_FILE"
+    exit 1
+fi
+require_secure_env_value "API_KEY" "$API_KEY_PLACEHOLDER"
+require_secure_env_value "BM25_INDEX_HMAC_KEY" "$BM25_INDEX_HMAC_KEY_PLACEHOLDER"
+# 6. 볼륨 디렉토리 생성
+echo "볼륨 디렉토리 생성 중..."
+mkdir -p \
+    "${PROJECT_DIR}/models" \
+    "${PROJECT_DIR}/data" \
+    "${PROJECT_DIR}/agents" \
+    "${PROJECT_DIR}/configs" \
+    "${PROJECT_DIR}/logs" \
+    "${PROJECT_DIR}/.cache"
+echo "[OK] 볼륨 디렉토리 준비 완료"
+# 7. 컨테이너 실행
+echo "컨테이너 시작 중..."
+docker compose --env-file "${ENV_FILE}" -f "${PROJECT_DIR}/docker-compose.offline.yml" up -d
+echo "[OK] 컨테이너 시작됨"
+# 8. 헬스체크 대기
+echo "서버 시작 대기 중... (최대 120초)"
+for i in $(seq 1 24); do
+    if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
+        echo ""
+        echo "=============================="
+        echo "[SUCCESS] GovOn 서버가 정상 시작되었습니다!"
+        echo "API 주소: http://localhost:8000"
+        echo "헬스체크: http://localhost:8000/health"
+        echo "=============================="
+        exit 0
+    fi
+    printf "."
+    sleep 5
+done
+echo ""
+echo "[ERROR] 서버 시작 실패 (120초 타임아웃)"
+echo "로그 확인: docker compose --env-file ${ENV_FILE} -f ${PROJECT_DIR}/docker-compose.offline.yml logs"
+exit 1

scripts/smoke-test.sh ADDED Viewed

	@@ -0,0 +1,46 @@

+#!/usr/bin/env bash
+set -euo pipefail
+BASE_URL="${1:-http://localhost:8000}"
+echo "=== GovOn Smoke Test ==="
+echo "대상: $BASE_URL"
+echo ""
+PASS=0
+FAIL=0
+# Test 1: Health check
+echo -n "[TEST] GET /health ... "
+HEALTH_RESPONSE=$(curl -sf "${BASE_URL}/health" 2>/dev/null) || { echo "FAIL (연결 실패)"; FAIL=$((FAIL+1)); }
+if [ -n "${HEALTH_RESPONSE:-}" ]; then
+    STATUS=$(echo "$HEALTH_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
+    if [ "$STATUS" = "healthy" ]; then
+        echo "PASS"
+        PASS=$((PASS+1))
+    else
+        echo "FAIL (status: ${STATUS:-unknown})"
+        FAIL=$((FAIL+1))
+    fi
+fi
+# Test 2: Health response structure
+echo -n "[TEST] /health 응답 구조 ... "
+if echo "$HEALTH_RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'status' in d" 2>/dev/null; then
+    echo "PASS"
+    PASS=$((PASS+1))
+else
+    echo "FAIL"
+    FAIL=$((FAIL+1))
+fi
+echo ""
+echo "=============================="
+echo "결과: PASS=${PASS}, FAIL=${FAIL}"
+if [ "$FAIL" -gt 0 ]; then
+    echo "상태: FAILED"
+    exit 1
+else
+    echo "상태: PASSED"
+    exit 0
+fi

scripts/test_alio_only.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import requests
+def test_alio_api():
+    key = os.getenv("DATA_GO_KR_API_KEY")
+    # Decoding 키를 직접 사용하여 requests가 인코딩하도록 위임
+    url = "https://apis.data.go.kr/1051000/public_inst/list"
+    params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
+    try:
+        res = requests.get(url, params=params, timeout=10)
+        print(f"HTTP Status: {res.status_code}")
+        if res.status_code == 200:
+            if "<ServiceKey Error" in res.text:
+                print("❌ 인증키 오류 (ServiceKey Error)")
+                return False
+            try:
+                data = res.json()
+                header = data.get("response", {}).get("header", {})
+                code = header.get("resultCode")
+                msg = header.get("resultMsg")
+                if code == "00":
+                    print(f"✅ ALIO API 유효함! (결과코드: {code})")
+                    return True
+                else:
+                    print(f"❌ 인증 오류 발생 (코드: {code}, 메시지: {msg})")
+                    return False
+            except Exception as e:
+                print(f"⚠️ JSON 파싱 실패 또는 비정상 응답: {res.text[:200]}")
+                return False
+        else:
+            print(f"❌ HTTP 요청 실패 (Status: {res.status_code})")
+            return False
+    except Exception as e:
+        print(f"❌ 연결 실패: {e}")
+        return False
+if __name__ == "__main__":
+    test_alio_api()

scripts/test_api_keys.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import sys
+import requests
+def test_law_api():
+    oc = os.getenv("LAW_GO_KR_OC")
+    url = "http://www.law.go.kr/DRF/lawSearch.do"
+    params = {"target": "law", "query": "민원", "type": "XML", "OC": oc}
+    try:
+        res = requests.get(url, params=params, timeout=10)
+        if res.status_code == 200 and "<law" in res.text:
+            print(f"[LAW API] ✅ 유효함 (상태코드: 200)")
+            return True
+        else:
+            print(f"[LAW API] ❌ 오류 (상태코드: {res.status_code})")
+            print(f"응답내용 일부: {res.text[:200]}")
+            return False
+    except Exception as e:
+        print(f"[LAW API] ❌ 연결 실패: {e}")
+        return False
+def test_alio_api():
+    key = os.getenv("DATA_GO_KR_API_KEY")
+    # Decoding 키를 사용하기 때문에 requests가 한 번 더 인코딩하도록 함
+    url = "https://apis.data.go.kr/1051000/public_inst/list"
+    params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
+    try:
+        res = requests.get(url, params=params, timeout=10)
+        if res.status_code == 200:
+            try:
+                data = res.json()
+                code = data.get("response", {}).get("header", {}).get("resultCode")
+                if code == "00":
+                    print(f"[ALIO API] ✅ 유효함 (상태코드: 200, 결과코드: 00)")
+                    return True
+                else:
+                    msg = (
+                        data.get("response", {})
+                        .get("header", {})
+                        .get("resultMsg", "알 수 없는 오류")
+                    )
+                    print(f"[ALIO API] ❌ 인증 오류 (결과코드: {code}, 메시지: {msg})")
+                    return False
+            except Exception:
+                if "<ServiceKey Error" in res.text:
+                    print("[ALIO API] ❌ 인증키 오류 (ServiceKey Error)")
+                else:
+                    print(f"[ALIO API] ❌ 비정상 응답: {res.text[:200]}")
+                return False
+        else:
+            print(f"[ALIO API] ❌ HTTP 오류 (상태코드: {res.status_code})")
+            return False
+    except Exception as e:
+        print(f"[ALIO API] ❌ 연결 실패: {e}")
+        return False
+if __name__ == "__main__":
+    print("-" * 50)
+    print("🚀 API 키 유효성 검사 시작")
+    law_ok = test_law_api()
+    alio_ok = test_alio_api()
+    print("-" * 50)
+    if law_ok and alio_ok:
+        print("✨ 모든 API 키가 정상적으로 작동합니다!")
+    else:
+        print("⚠️ 일부 API 키에 확인이 필요합니다.")
+        sys.exit(1)

scripts/test_law_https.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import requests
+def test_law_https():
+    oc = os.getenv("LAW_GO_KR_OC")
+    # HTTPS 주소로 시도
+    url = f"https://www.law.go.kr/DRF/lawSearch.do?target=law&query=민원&type=XML&OC={oc}"
+    try:
+        res = requests.get(url, timeout=15)
+        print(f"URL: {url}")
+        print(f"Status: {res.status_code}")
+        if "사용자 정보 검증에 실패" in res.text:
+            print("❌ HTTPS로도 IP 인증 실패")
+        elif "<law" in res.text:
+            print("✅ HTTPS 호출 성공!")
+        else:
+            print(f"⚠️ 응답 확인 필요: {res.text[:200]}")
+    except Exception as e:
+        print(f"❌ 에러: {e}")
+if __name__ == "__main__":
+    test_law_https()

scripts/verify_e2e_tool_calling.py ADDED Viewed

	@@ -0,0 +1,1645 @@

+#!/usr/bin/env python3
+"""GovOn Native Tool Calling + AdapterRegistry E2E 검증 스크립트.
+HuggingFace Space에 배포된 govon-runtime 서버에 대해
+에이전트 파이프라인(플래너 → 도구 실행 → 어댑터 전환)을 검증한다.
+사용법:
+    GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_e2e_tool_calling.py
+    GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_e2e_tool_calling.py
+5-Phase 검증 (13 시나리오):
+    Phase 1: Infrastructure (hard gate)
+        1. Health & Profile
+        2. Base Model Generation
+        3. Adapter Registry
+    Phase 2: Agent Pipeline Core
+        4. Planner Produces Valid Plan
+        5. Civil LoRA Draft Response
+        6. Legal LoRA Evidence Augmentation (depends on 5)
+        7. Task Type Classification
+    Phase 3: data.go.kr API Tools (soft gate)
+        8. External API Tool Invocation (4 sub-cases)
+    Phase 4: Adapter Dynamics
+        9. Sequential Adapter Switching
+        10. LoRA ID Consistency
+    Phase 5: Robustness
+        11. Empty Query Handling
+        12. Reject Flow Completeness
+        13. Concurrent Request Isolation
+"""
+# stdlib
+import asyncio
+import json
+import logging
+import os
+import re
+import sys
+import time
+from typing import Any, Optional
+from uuid import uuid4
+BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/")
+API_KEY = os.environ.get("API_KEY")
+TIMEOUT = 300  # 시나리오당 최대 대기 시간 (초)
+BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ"
+RESULTS_PATH = "verify_e2e_tool_calling_results.json"
+VALID_TOOLS = frozenset(
+    {
+        "rag_search",
+        "api_lookup",
+        "draft_civil_response",
+        "append_evidence",
+        "issue_detector",
+        "stats_lookup",
+        "keyword_analyzer",
+        "demographics_lookup",
+    }
+)
+LEGAL_PATTERNS = [
+    r"제\s*\d+\s*조",
+    r"제\s*\d+\s*항",
+    r"법률",
+    r"시행령",
+    r"조례",
+    r"판례",
+    r"대법원",
+    r"법",
+    r"령",
+    r"규정",
+]
+logger = logging.getLogger(__name__)
+_results: list[dict] = []
+_observed_tools: set[str] = set()
+_run_id = uuid4().hex
+# ---------------------------------------------------------------------------
+# HTTP 클라이언트 레이어 (httpx 우선, urllib fallback)
+# ---------------------------------------------------------------------------
+try:
+    import httpx
+    _HTTP_BACKEND = "httpx"
+    def _build_headers() -> dict:
+        h = {"Content-Type": "application/json", "Accept": "application/json"}
+        if API_KEY:
+            h["X-API-Key"] = API_KEY
+        return h
+    async def http_get(path: str, timeout: float = TIMEOUT) -> tuple[int, dict]:
+        url = BASE_URL + path
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            resp = await client.get(url, headers=_build_headers())
+            try:
+                return resp.status_code, resp.json()
+            except Exception:
+                return resp.status_code, {"_raw": resp.text[:200]}
+    async def http_post(path: str, body: dict, timeout: float = TIMEOUT) -> tuple[int, dict]:
+        url = BASE_URL + path
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            resp = await client.post(url, json=body, headers=_build_headers())
+            try:
+                return resp.status_code, resp.json()
+            except Exception:
+                return resp.status_code, {"_raw": resp.text[:200]}
+    async def http_post_sse(
+        path: str, body: dict, timeout: float = TIMEOUT
+    ) -> tuple[int, list[dict]]:
+        """SSE 스트리밍 POST. 청크를 수집하여 파싱된 이벤트 목록을 반환한다."""
+        url = BASE_URL + path
+        h = _build_headers()
+        h["Accept"] = "text/event-stream"
+        events: list[dict] = []
+        status_code = 0
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            async with client.stream("POST", url, json=body, headers=h) as resp:
+                status_code = resp.status_code
+                async for line in resp.aiter_lines():
+                    line = line.strip()
+                    if not line.startswith("data:"):
+                        continue
+                    payload = line[len("data:") :].strip()
+                    if not payload:
+                        continue
+                    try:
+                        events.append(json.loads(payload))
+                    except json.JSONDecodeError:
+                        events.append({"_raw": payload})
+        return status_code, events
+    async def http_get_raw(url: str, timeout: float = 10) -> tuple[int, str]:
+        """Raw GET for external connectivity checks."""
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            resp = await client.get(url)
+            return resp.status_code, resp.text[:200]
+except ImportError:
+    import urllib.error
+    import urllib.request
+    _HTTP_BACKEND = "urllib"
+    def _build_headers() -> dict:
+        h = {"Content-Type": "application/json", "Accept": "application/json"}
+        if API_KEY:
+            h["X-API-Key"] = API_KEY
+        return h
+    async def http_get(path: str, timeout: float = TIMEOUT) -> tuple[int, dict]:
+        url = BASE_URL + path
+        req = urllib.request.Request(url, headers=_build_headers(), method="GET")
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as r:
+                return r.status, json.loads(r.read().decode())
+        except urllib.error.HTTPError as e:
+            return e.code, {}
+    async def http_post(path: str, body: dict, timeout: float = TIMEOUT) -> tuple[int, dict]:
+        url = BASE_URL + path
+        data = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST")
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as r:
+                return r.status, json.loads(r.read().decode())
+        except urllib.error.HTTPError as e:
+            return e.code, {}
+    async def http_post_sse(
+        path: str, body: dict, timeout: float = TIMEOUT
+    ) -> tuple[int, list[dict]]:
+        """urllib fallback: SSE 스트리밍을 동기 방식으로 읽는다."""
+        url = BASE_URL + path
+        data = json.dumps(body).encode()
+        h = _build_headers()
+        h["Accept"] = "text/event-stream"
+        req = urllib.request.Request(url, data=data, headers=h, method="POST")
+        events: list[dict] = []
+        status_code = 0
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as r:
+                status_code = r.status
+                for raw_line in r:
+                    line = raw_line.decode("utf-8", errors="replace").strip()
+                    if not line.startswith("data:"):
+                        continue
+                    payload = line[len("data:") :].strip()
+                    if not payload:
+                        continue
+                    try:
+                        events.append(json.loads(payload))
+                    except json.JSONDecodeError:
+                        events.append({"_raw": payload})
+        except urllib.error.HTTPError as e:
+            status_code = e.code
+        return status_code, events
+    async def http_get_raw(url: str, timeout: float = 10) -> tuple[int, str]:
+        """Raw GET for external connectivity checks."""
+        req = urllib.request.Request(url, method="GET")
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as r:
+                return r.status, r.read().decode()[:200]
+        except urllib.error.HTTPError as e:
+            return e.code, ""
+        except Exception:
+            return 0, ""
+# ---------------------------------------------------------------------------
+# 결과 기록 / 출력 헬퍼
+# ---------------------------------------------------------------------------
+def _record(
+    scenario_num: int,
+    name: str,
+    phase: int,
+    status: str,
+    elapsed: float,
+    attempts: int = 1,
+    assertions: Optional[list[str]] = None,
+    warnings: Optional[list[str]] = None,
+    error: Optional[str] = None,
+    detail: Optional[Any] = None,
+) -> dict:
+    tag = {"passed": "[PASS]", "failed": "[FAIL]", "skipped": "[SKIP]"}.get(status, "[????]")
+    suffix = f"({elapsed:.2f}s)"
+    if status == "passed":
+        print(f"{tag} Scenario {scenario_num}: {name} {suffix}")
+    elif status == "skipped":
+        print(f"{tag} Scenario {scenario_num}: {name} — {error or 'skipped'} {suffix}")
+    else:
+        print(f"{tag} Scenario {scenario_num}: {name} — {error} {suffix}")
+    if warnings:
+        for w in warnings:
+            print(f"  [WARN] {w}")
+    entry = {
+        "id": scenario_num,
+        "name": name,
+        "phase": phase,
+        "status": status,
+        "attempts": attempts,
+        "elapsed_s": round(elapsed, 3),
+        "assertions": assertions or [],
+        "warnings": warnings or [],
+        "error": error,
+        "detail": detail,
+    }
+    _results.append(entry)
+    return entry
+def _session_id(scenario_num: int) -> str:
+    return f"e2e-{scenario_num}-{uuid4().hex[:8]}"
+def _extract_text_from_events(events: list[dict]) -> str:
+    """SSE 이벤트 목록에서 최종 텍스트를 추출한다."""
+    for ev in reversed(events):
+        if ev.get("node") == "synthesis" and ev.get("final_text"):
+            return ev["final_text"]
+    for ev in reversed(events):
+        if ev.get("finished") and ev.get("text"):
+            return ev["text"]
+    chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events]
+    return "".join(c for c in chunks if c)
+def _contains_legal_keyword(text: str) -> bool:
+    return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS)
+# ---------------------------------------------------------------------------
+# Agent 호출 헬퍼: _call_agent_with_approval()
+# ---------------------------------------------------------------------------
+async def _call_agent_with_approval(
+    query: str,
+    session_id: str,
+    approve: bool = True,
+    timeout: float = 180,
+) -> tuple[bool, str, dict, Optional[str]]:
+    """에이전트 SSE 스트리밍으로 호출 → awaiting_approval까지 파싱 → approve/reject.
+    Returns: (success, text, metadata_dict, error)
+    metadata_dict keys: planned_tools, task_type, tool_results, adapter_mode, tool_args
+    """
+    body = {"query": query, "session_id": session_id, "use_rag": False}
+    meta: dict[str, Any] = {
+        "planned_tools": [],
+        "task_type": None,
+        "tool_results": {},
+        "adapter_mode": None,
+        "tool_args": {},
+    }
+    # --- SSE 스트리밍 시도 ---
+    try:
+        status_code, events = await http_post_sse("/v2/agent/stream", body, timeout=timeout)
+        if status_code != 200:
+            raise RuntimeError(f"SSE HTTP {status_code}")
+        # awaiting_approval 또는 __interrupt__ 이벤트 탐색
+        awaiting = None
+        for ev in events:
+            if ev.get("status") == "awaiting_approval" or ev.get("node") == "__interrupt__":
+                awaiting = ev
+                break
+            # 플래너 노드에서 planned_tools 추출
+            if ev.get("planned_tools"):
+                meta["planned_tools"] = ev["planned_tools"]
+            if ev.get("task_type"):
+                meta["task_type"] = ev["task_type"]
+            if ev.get("adapter_mode"):
+                meta["adapter_mode"] = ev["adapter_mode"]
+            if ev.get("tool_args"):
+                meta["tool_args"] = ev["tool_args"]
+        if awaiting:
+            # awaiting 이벤트에서 메타데이터 추출
+            if awaiting.get("planned_tools"):
+                meta["planned_tools"] = awaiting["planned_tools"]
+            if awaiting.get("task_type"):
+                meta["task_type"] = awaiting["task_type"]
+            if awaiting.get("adapter_mode"):
+                meta["adapter_mode"] = awaiting["adapter_mode"]
+            if awaiting.get("tool_args"):
+                meta["tool_args"] = awaiting["tool_args"]
+            thread_id = awaiting.get("thread_id") or session_id
+            # approve/reject
+            approve_code, approve_resp = await http_post(
+                f"/v2/agent/approve?thread_id={thread_id}&approved={'true' if approve else 'false'}",
+                {},
+                timeout=timeout,
+            )
+            if approve_code != 200:
+                return False, "", meta, f"approve HTTP {approve_code}: {approve_resp}"
+            # approve 응답에서 최종 텍스트 및 도구 결과 추출
+            final_text = approve_resp.get("text", "") or approve_resp.get("final_text", "") or ""
+            if approve_resp.get("tool_results"):
+                meta["tool_results"] = approve_resp["tool_results"]
+            if approve_resp.get("adapter_mode"):
+                meta["adapter_mode"] = approve_resp["adapter_mode"]
+            if approve_resp.get("status") == "rejected":
+                return True, final_text, meta, None
+            if final_text:
+                return True, final_text, meta, None
+            return False, "", meta, f"approve 200 but text 없음: {approve_resp}"
+        # awaiting 이벤트 없이 최종 텍스트가 있는 경우 (auto-approve 모드)
+        text = _extract_text_from_events(events)
+        # 이벤트에서 추가 메타데이터 수집
+        for ev in events:
+            if ev.get("planned_tools") and not meta["planned_tools"]:
+                meta["planned_tools"] = ev["planned_tools"]
+            if ev.get("task_type") and not meta["task_type"]:
+                meta["task_type"] = ev["task_type"]
+            if ev.get("tool_results") and not meta["tool_results"]:
+                meta["tool_results"] = ev["tool_results"]
+            if ev.get("adapter_mode") and not meta["adapter_mode"]:
+                meta["adapter_mode"] = ev["adapter_mode"]
+            if ev.get("tool_args") and not meta["tool_args"]:
+                meta["tool_args"] = ev["tool_args"]
+        if text:
+            return True, text, meta, None
+        # error 이벤트 확인
+        for ev in events:
+            if ev.get("status") == "error":
+                return False, "", meta, ev.get("error", "unknown error")
+        return False, "", meta, f"SSE 이벤트 수신했으나 text/awaiting 없음 (events={len(events)})"
+    except Exception as sse_exc:
+        logger.warning("SSE stream failed: %s — falling back to REST", sse_exc)
+    # --- REST fallback: /v2/agent/run ---
+    try:
+        status_code, resp = await http_post("/v2/agent/run", body, timeout=timeout)
+        if status_code != 200:
+            return False, "", meta, f"REST HTTP {status_code}: {resp}"
+        if resp.get("planned_tools"):
+            meta["planned_tools"] = resp["planned_tools"]
+        if resp.get("task_type"):
+            meta["task_type"] = resp["task_type"]
+        if resp.get("adapter_mode"):
+            meta["adapter_mode"] = resp["adapter_mode"]
+        if resp.get("tool_args"):
+            meta["tool_args"] = resp["tool_args"]
+        if resp.get("status") == "awaiting_approval":
+            thread_id = resp.get("thread_id") or session_id
+            approve_code, approve_resp = await http_post(
+                f"/v2/agent/approve?thread_id={thread_id}&approved={'true' if approve else 'false'}",
+                {},
+                timeout=timeout,
+            )
+            if approve_code != 200:
+                return False, "", meta, f"approve HTTP {approve_code}"
+            final_text = approve_resp.get("text", "") or approve_resp.get("final_text", "") or ""
+            if approve_resp.get("tool_results"):
+                meta["tool_results"] = approve_resp["tool_results"]
+            if approve_resp.get("status") == "rejected":
+                return True, final_text, meta, None
+            if final_text:
+                return True, final_text, meta, None
+            return False, "", meta, "approve 200 but text 없음"
+        if resp.get("status") == "error":
+            return False, "", meta, resp.get("error", "agent run error")
+        text = resp.get("text", "") or resp.get("final_text", "")
+        if resp.get("tool_results"):
+            meta["tool_results"] = resp["tool_results"]
+        if text:
+            return True, text, meta, None
+        return False, "", meta, f"text 없음, status={resp.get('status')}"
+    except Exception as exc:
+        return False, "", meta, str(exc)
+# ---------------------------------------------------------------------------
+# Phase 1: Infrastructure (hard gate)
+# ---------------------------------------------------------------------------
+async def scenario1_health_profile() -> dict:
+    """Scenario 1: Health & Profile (retry 3x with backoff)."""
+    backoffs = [5, 10, 20]
+    attempts = 0
+    last_error = ""
+    for attempt_idx in range(3):
+        attempts += 1
+        t0 = time.monotonic()
+        try:
+            status_code, body = await http_get("/health", timeout=10)
+            elapsed = time.monotonic() - t0
+            assertions = []
+            if status_code != 200:
+                last_error = f"HTTP {status_code}"
+                if attempt_idx < 2:
+                    await asyncio.sleep(backoffs[attempt_idx])
+                    continue
+                return _record(
+                    1,
+                    "Health & Profile",
+                    1,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=["HTTP 200"],
+                    error=last_error,
+                    detail={"body": body},
+                )
+            assertions.append("HTTP 200: OK")
+            srv_status = body.get("status", "")
+            if srv_status not in ("ok", "healthy"):
+                last_error = f"status={srv_status!r}, expected ok/healthy"
+                if attempt_idx < 2:
+                    await asyncio.sleep(backoffs[attempt_idx])
+                    continue
+                return _record(
+                    1,
+                    "Health & Profile",
+                    1,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    error=last_error,
+                    detail={"body": body},
+                )
+            assertions.append(f"status={srv_status}: OK")
+            warnings = []
+            if "model" not in body:
+                warnings.append("model field not found in /health")
+            else:
+                assertions.append(f"model={body['model']}: OK")
+            if "profile" not in body:
+                warnings.append("profile field not found in /health")
+            else:
+                assertions.append(f"profile={body['profile']}: OK")
+            return _record(
+                1,
+                "Health & Profile",
+                1,
+                "passed",
+                elapsed,
+                attempts,
+                assertions=assertions,
+                warnings=warnings,
+                detail={
+                    "status": srv_status,
+                    "model": body.get("model"),
+                    "profile": body.get("profile"),
+                },
+            )
+        except Exception as exc:
+            last_error = str(exc)
+            if attempt_idx < 2:
+                await asyncio.sleep(backoffs[attempt_idx])
+                continue
+            return _record(
+                1,
+                "Health & Profile",
+                1,
+                "failed",
+                time.monotonic() - t0,
+                attempts,
+                error=last_error,
+            )
+    return _record(1, "Health & Profile", 1, "failed", 0, attempts, error=last_error)
+async def scenario2_base_model_generation() -> dict:
+    """Scenario 2: Base Model Generation (retry 2x)."""
+    body_completions = {
+        "model": BASE_MODEL,
+        "prompt": "대한민국의 수도는",
+        "max_tokens": 32,
+        "temperature": 0.0,
+    }
+    last_error = ""
+    attempts = 0
+    for attempt_idx in range(2):
+        attempts += 1
+        t0 = time.monotonic()
+        try:
+            status_code, resp = await http_post("/v1/completions", body_completions, timeout=60)
+            elapsed = time.monotonic() - t0
+            if status_code == 200:
+                choices = resp.get("choices", [])
+                if choices and choices[0].get("text") is not None:
+                    text = choices[0]["text"]
+                    if text.strip():
+                        return _record(
+                            2,
+                            "Base Model Generation",
+                            1,
+                            "passed",
+                            elapsed,
+                            attempts,
+                            assertions=["HTTP 200", "non-empty text"],
+                            detail={"endpoint": "/v1/completions", "text_preview": text[:100]},
+                        )
+            # fallback /v1/generate
+            body_legacy = {
+                "prompt": "대한민국의 수도는",
+                "max_tokens": 32,
+                "temperature": 0.0,
+                "use_rag": False,
+            }
+            status_code2, resp2 = await http_post("/v1/generate", body_legacy, timeout=60)
+            elapsed2 = time.monotonic() - t0
+            if status_code2 == 200 and resp2.get("text", "").strip():
+                return _record(
+                    2,
+                    "Base Model Generation",
+                    1,
+                    "passed",
+                    elapsed2,
+                    attempts,
+                    assertions=["HTTP 200 (fallback)", "non-empty text"],
+                    detail={"endpoint": "/v1/generate", "text_preview": resp2["text"][:100]},
+                )
+            last_error = f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}"
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(
+        2, "Base Model Generation", 1, "failed", time.monotonic() - t0, attempts, error=last_error
+    )
+async def scenario3_adapter_registry() -> dict:
+    """Scenario 3: Adapter Registry via /v1/models."""
+    t0 = time.monotonic()
+    try:
+        status_code, resp = await http_get("/v1/models", timeout=10)
+        elapsed = time.monotonic() - t0
+        assertions = []
+        warnings = []
+        if status_code != 200:
+            return _record(
+                3,
+                "Adapter Registry",
+                1,
+                "failed",
+                elapsed,
+                assertions=["HTTP 200"],
+                error=f"HTTP {status_code}",
+                detail={"resp": resp},
+            )
+        assertions.append("HTTP 200: OK")
+        data = resp.get("data", [])
+        if not isinstance(data, list):
+            return _record(
+                3,
+                "Adapter Registry",
+                1,
+                "failed",
+                elapsed,
+                assertions=assertions,
+                error="data array missing or invalid",
+                detail={"resp": resp},
+            )
+        assertions.append(f"data array: {len(data)} models")
+        model_ids = [m.get("id", "") for m in data]
+        civil_found = any("civil" in mid for mid in model_ids)
+        legal_found = any("legal" in mid for mid in model_ids)
+        if not civil_found:
+            warnings.append("civil adapter not detected in /v1/models (WARN, not FAIL)")
+        else:
+            assertions.append("civil adapter detected")
+        if not legal_found:
+            warnings.append("legal adapter not detected in /v1/models (WARN, not FAIL)")
+        else:
+            assertions.append("legal adapter detected")
+        return _record(
+            3,
+            "Adapter Registry",
+            1,
+            "passed",
+            elapsed,
+            assertions=assertions,
+            warnings=warnings,
+            detail={"model_ids": model_ids, "civil_found": civil_found, "legal_found": legal_found},
+        )
+    except Exception as exc:
+        return _record(3, "Adapter Registry", 1, "failed", time.monotonic() - t0, error=str(exc))
+# ---------------------------------------------------------------------------
+# Phase 2: Agent Pipeline Core
+# ---------------------------------------------------------------------------
+# Scenario 5/6 공유 세션
+_scenario5_session_id: Optional[str] = None
+_scenario5_passed: bool = False
+async def scenario4_planner_valid_plan() -> dict:
+    """Scenario 4: Planner Produces Valid Plan (retry 2x)."""
+    query = "서울시 도로 파손 민원에 대한 답변 초안을 작성해주세요"
+    last_error = ""
+    attempts = 0
+    for attempt_idx in range(2):
+        attempts += 1
+        t0 = time.monotonic()
+        try:
+            sid = _session_id(4)
+            ok, text, meta, err = await _call_agent_with_approval(
+                query, sid, approve=True, timeout=120
+            )
+            elapsed = time.monotonic() - t0
+            planned = meta.get("planned_tools", [])
+            if planned:
+                _observed_tools.update(planned)
+            assertions = []
+            if not planned:
+                last_error = err or "planned_tools 비어있음"
+                if attempt_idx < 1:
+                    continue
+                return _record(
+                    4,
+                    "Planner Produces Valid Plan",
+                    2,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=["planned_tools non-empty"],
+                    error=last_error,
+                    detail={"meta": meta},
+                )
+            assertions.append(f"planned_tools: {planned}")
+            invalid = [t for t in planned if t not in VALID_TOOLS]
+            if invalid:
+                last_error = f"invalid tools: {invalid}"
+                if attempt_idx < 1:
+                    continue
+                return _record(
+                    4,
+                    "Planner Produces Valid Plan",
+                    2,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    error=last_error,
+                    detail={"invalid_tools": invalid, "valid": list(VALID_TOOLS)},
+                )
+            assertions.append("all tools in VALID_TOOLS whitelist")
+            return _record(
+                4,
+                "Planner Produces Valid Plan",
+                2,
+                "passed",
+                elapsed,
+                attempts,
+                assertions=assertions,
+                detail={"planned_tools": planned, "meta": meta},
+            )
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(4, "Planner Produces Valid Plan", 2, "failed", 0, attempts, error=last_error)
+async def scenario5_civil_lora_draft() -> dict:
+    """Scenario 5: Civil LoRA Draft Response (retry 2x)."""
+    global _scenario5_session_id, _scenario5_passed
+    query = "아파트 층간소음 민원에 대한 답변을 작성해주세요"
+    last_error = ""
+    attempts = 0
+    for attempt_idx in range(2):
+        attempts += 1
+        t0 = time.monotonic()
+        try:
+            sid = _session_id(5)
+            ok, text, meta, err = await _call_agent_with_approval(
+                query, sid, approve=True, timeout=180
+            )
+            elapsed = time.monotonic() - t0
+            planned = meta.get("planned_tools", [])
+            if planned:
+                _observed_tools.update(planned)
+            assertions = []
+            if not ok:
+                last_error = err or "agent call failed"
+                if attempt_idx < 1:
+                    continue
+                return _record(
+                    5,
+                    "Civil LoRA Draft Response",
+                    2,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    error=last_error,
+                    detail={"meta": meta},
+                )
+            has_draft = "draft_civil_response" in planned
+            if has_draft:
+                assertions.append("draft_civil_response in planned_tools")
+            else:
+                assertions.append(f"draft_civil_response NOT in planned_tools ({planned})")
+            if len(text) >= 50:
+                assertions.append(f"text length {len(text)} >= 50")
+            else:
+                assertions.append(f"text length {len(text)} < 50 (FAIL)")
+            task_type = meta.get("task_type")
+            if task_type == "draft_response":
+                assertions.append("task_type=draft_response")
+            else:
+                assertions.append(f"task_type={task_type} (expected draft_response)")
+            # 핵심 검증: text >= 50 이면 PASS (planned_tools와 task_type은 soft 검증)
+            passed = len(text) >= 50
+            if passed:
+                _scenario5_session_id = sid
+                _scenario5_passed = True
+            warnings = []
+            if not has_draft:
+                warnings.append("draft_civil_response not in planned_tools")
+            if task_type != "draft_response":
+                warnings.append(f"task_type={task_type}, expected draft_response")
+            if passed:
+                return _record(
+                    5,
+                    "Civil LoRA Draft Response",
+                    2,
+                    "passed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    warnings=warnings,
+                    detail={"text_preview": text[:200], "meta": meta},
+                )
+            last_error = "text < 50 chars"
+            if attempt_idx < 1:
+                continue
+            return _record(
+                5,
+                "Civil LoRA Draft Response",
+                2,
+                "failed",
+                elapsed,
+                attempts,
+                assertions=assertions,
+                warnings=warnings,
+                error=last_error,
+                detail={"text_preview": text[:200], "meta": meta},
+            )
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(5, "Civil LoRA Draft Response", 2, "failed", 0, attempts, error=last_error)
+async def scenario6_legal_lora_evidence() -> dict:
+    """Scenario 6: Legal LoRA Evidence Augmentation (depends on Scenario 5)."""
+    if not _scenario5_passed:
+        return _record(
+            6,
+            "Legal LoRA Evidence Augmentation",
+            2,
+            "skipped",
+            0,
+            error="Scenario 5 failed — dependency skip",
+        )
+    query = "위 답변에 관련 법령과 판례 근거를 추가해주세요"
+    last_error = ""
+    attempts = 0
+    for attempt_idx in range(2):
+        attempts += 1
+        t0 = time.monotonic()
+        try:
+            ok, text, meta, err = await _call_agent_with_approval(
+                query, _scenario5_session_id, approve=True, timeout=180
+            )
+            elapsed = time.monotonic() - t0
+            planned = meta.get("planned_tools", [])
+            if planned:
+                _observed_tools.update(planned)
+            assertions = []
+            if not ok:
+                last_error = err or "agent call failed"
+                if attempt_idx < 1:
+                    continue
+                return _record(
+                    6,
+                    "Legal LoRA Evidence Augmentation",
+                    2,
+                    "failed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    error=last_error,
+                    detail={"meta": meta},
+                )
+            has_evidence = "append_evidence" in planned
+            if has_evidence:
+                assertions.append("append_evidence in planned_tools")
+            else:
+                assertions.append(f"append_evidence NOT in planned_tools ({planned})")
+            has_legal = _contains_legal_keyword(text)
+            matched = [p for p in LEGAL_PATTERNS if re.search(p, text)]
+            if has_legal:
+                assertions.append(f"legal patterns found: {matched[:3]}")
+            else:
+                assertions.append("no legal patterns found (FAIL)")
+            warnings = []
+            if not has_evidence:
+                warnings.append("append_evidence not in planned_tools")
+            if has_legal:
+                return _record(
+                    6,
+                    "Legal LoRA Evidence Augmentation",
+                    2,
+                    "passed",
+                    elapsed,
+                    attempts,
+                    assertions=assertions,
+                    warnings=warnings,
+                    detail={"text_preview": text[:300], "matched_patterns": matched, "meta": meta},
+                )
+            last_error = "legal pattern not found in response"
+            if attempt_idx < 1:
+                continue
+            return _record(
+                6,
+                "Legal LoRA Evidence Augmentation",
+                2,
+                "failed",
+                elapsed,
+                attempts,
+                assertions=assertions,
+                warnings=warnings,
+                error=last_error,
+                detail={"text_preview": text[:300], "meta": meta},
+            )
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(
+        6, "Legal LoRA Evidence Augmentation", 2, "failed", 0, attempts, error=last_error
+    )
+async def scenario7_task_type_classification() -> dict:
+    """Scenario 7: Task Type Classification (at least 2/3 correct)."""
+    test_cases = [
+        ("민원 답변 초안을 작성해줘", {"draft_response"}),
+        ("관련 통계 데이터를 조회해줘", {"stats_query", "lookup_stats"}),
+        ("이 민원의 근거를 보강해줘", {"append_evidence"}),
+    ]
+    t0 = time.monotonic()
+    correct = 0
+    sub_results = []
+    for query, expected_types in test_cases:
+        try:
+            sid = _session_id(7)
+            ok, text, meta, err = await _call_agent_with_approval(
+                query, sid, approve=True, timeout=180
+            )
+            planned = meta.get("planned_tools", [])
+            if planned:
+                _observed_tools.update(planned)
+            actual_type = meta.get("task_type")
+            matched = actual_type in expected_types if actual_type else False
+            if matched:
+                correct += 1
+            sub_results.append(
+                {
+                    "query": query[:30],
+                    "expected": list(expected_types),
+                    "actual": actual_type,
+                    "matched": matched,
+                    "ok": ok,
+                    "error": err,
+                }
+            )
+        except Exception as exc:
+            sub_results.append(
+                {
+                    "query": query[:30],
+                    "expected": list(expected_types),
+                    "actual": None,
+                    "matched": False,
+                    "error": str(exc),
+                }
+            )
+    elapsed = time.monotonic() - t0
+    assertions = [f"{correct}/3 task types correct (need >= 2)"]
+    if correct >= 2:
+        return _record(
+            7,
+            "Task Type Classification",
+            2,
+            "passed",
+            elapsed,
+            assertions=assertions,
+            detail={"sub_results": sub_results, "correct": correct},
+        )
+    return _record(
+        7,
+        "Task Type Classification",
+        2,
+        "failed",
+        elapsed,
+        assertions=assertions,
+        error=f"only {correct}/3 correct (need >= 2)",
+        detail={"sub_results": sub_results},
+    )
+# ---------------------------------------------------------------------------
+# Phase 3: data.go.kr API Tools (soft gate)
+# ---------------------------------------------------------------------------
+_datago_available: bool = False
+async def _check_datago_connectivity() -> bool:
+    """data.go.kr 연결 확인 preflight."""
+    global _datago_available
+    try:
+        code, _ = await http_get_raw("https://www.data.go.kr", timeout=10)
+        _datago_available = code in (200, 301, 302, 403)
+        return _datago_available
+    except Exception:
+        _datago_available = False
+        return False
+async def scenario8_external_api_tools() -> dict:
+    """Scenario 8: External API Tool Invocation (4 sub-cases, accept 3/4)."""
+    if not _datago_available:
+        return _record(
+            8,
+            "External API Tool Invocation",
+            3,
+            "skipped",
+            0,
+            error="data.go.kr unreachable — Phase 3 skipped",
+        )
+    sub_cases = [
+        ("8a", "최근 도로 관련 민원 이슈를 분석해줘", "issue_detector"),
+        ("8b", "서울시 민원 통계를 조회해줘", "stats_lookup"),
+        ("8c", "도로 관련 키워드 트렌드를 분석해줘", "keyword_analyzer"),
+        ("8d", "서울시 강남구 민원 인구통계를 조회해줘", "demographics_lookup"),
+    ]
+    t0 = time.monotonic()
+    sub_passed = 0
+    sub_results = []
+    for label, query, expected_tool in sub_cases:
+        for attempt_idx in range(2):  # retry 1x
+            try:
+                sid = _session_id(8)
+                ok, text, meta, err = await _call_agent_with_approval(
+                    query, sid, approve=True, timeout=180
+                )
+                planned = meta.get("planned_tools", [])
+                if planned:
+                    _observed_tools.update(planned)
+                tool_in_plan = expected_tool in planned
+                tool_results = meta.get("tool_results", {})
+                tool_in_results = expected_tool in tool_results
+                passed = tool_in_plan  # tool in planned_tools suffices
+                if passed:
+                    sub_passed += 1
+                sub_results.append(
+                    {
+                        "label": label,
+                        "expected_tool": expected_tool,
+                        "tool_in_plan": tool_in_plan,
+                        "tool_in_results": tool_in_results,
+                        "planned_tools": planned,
+                        "passed": passed,
+                        "attempt": attempt_idx + 1,
+                        "error": err,
+                    }
+                )
+                break  # no retry needed if we got a response
+            except Exception as exc:
+                if attempt_idx == 1:
+                    sub_results.append(
+                        {
+                            "label": label,
+                            "expected_tool": expected_tool,
+                            "passed": False,
+                            "error": str(exc),
+                            "attempt": attempt_idx + 1,
+                        }
+                    )
+    elapsed = time.monotonic() - t0
+    assertions = [f"{sub_passed}/4 sub-cases passed (need >= 3)"]
+    if sub_passed >= 3:
+        return _record(
+            8,
+            "External API Tool Invocation",
+            3,
+            "passed",
+            elapsed,
+            assertions=assertions,
+            detail={"sub_results": sub_results},
+        )
+    return _record(
+        8,
+        "External API Tool Invocation",
+        3,
+        "failed",
+        elapsed,
+        assertions=assertions,
+        error=f"only {sub_passed}/4 passed (need >= 3)",
+        detail={"sub_results": sub_results},
+    )
+# ---------------------------------------------------------------------------
+# Phase 4: Adapter Dynamics
+# ---------------------------------------------------------------------------
+async def scenario9_sequential_adapter_switching() -> dict:
+    """Scenario 9: Sequential Adapter Switching (3 iterations, 3 requests each)."""
+    t0 = time.monotonic()
+    errors: list[str] = []
+    total_requests = 0
+    for i in range(1, 4):
+        sid = _session_id(9)
+        # Civil query
+        ok1, text1, meta1, err1 = await _call_agent_with_approval(
+            "주차 위반 과태료 이의신청 민원 답변을 작성해줘", sid, approve=True, timeout=180
+        )
+        total_requests += 1
+        if meta1.get("planned_tools"):
+            _observed_tools.update(meta1["planned_tools"])
+        if not ok1 or not text1.strip():
+            errors.append(f"iter {i} civil-1: {err1 or '빈 응답'}")
+            continue
+        # Legal query (same session)
+        ok2, text2, meta2, err2 = await _call_agent_with_approval(
+            "위 답변에 관련 법령 근거를 추가해줘", sid, approve=True, timeout=180
+        )
+        total_requests += 1
+        if meta2.get("planned_tools"):
+            _observed_tools.update(meta2["planned_tools"])
+        if not ok2 or not text2.strip():
+            errors.append(f"iter {i} legal: {err2 or '빈 응답'}")
+            continue
+        # Civil query again (same session)
+        ok3, text3, meta3, err3 = await _call_agent_with_approval(
+            "추가 민원 답변 초안을 작성해줘", sid, approve=True, timeout=180
+        )
+        total_requests += 1
+        if meta3.get("planned_tools"):
+            _observed_tools.update(meta3["planned_tools"])
+        if not ok3 or not text3.strip():
+            errors.append(f"iter {i} civil-2: {err3 or '빈 응답'}")
+    elapsed = time.monotonic() - t0
+    assertions = [f"{total_requests} requests completed", f"{len(errors)} errors"]
+    if errors:
+        return _record(
+            9,
+            "Sequential Adapter Switching",
+            4,
+            "failed",
+            elapsed,
+            assertions=assertions,
+            error="; ".join(errors[:3]),
+            detail={"iterations": 3, "total_requests": total_requests, "errors": errors},
+        )
+    return _record(
+        9,
+        "Sequential Adapter Switching",
+        4,
+        "passed",
+        elapsed,
+        assertions=assertions,
+        detail={"iterations": 3, "total_requests": total_requests, "all_passed": True},
+    )
+async def scenario10_lora_id_consistency() -> dict:
+    """Scenario 10: LoRA ID Consistency (informational, always PASS)."""
+    t0 = time.monotonic()
+    try:
+        _, resp_before = await http_get("/v1/models", timeout=10)
+        models_before = [m.get("id", "") for m in resp_before.get("data", [])]
+        # Scenario 9 이미 완료된 상태에서 다시 확인
+        _, resp_after = await http_get("/v1/models", timeout=10)
+        models_after = [m.get("id", "") for m in resp_after.get("data", [])]
+        elapsed = time.monotonic() - t0
+        stable = set(models_before) == set(models_after)
+        assertions = [
+            f"before: {len(models_before)} models",
+            f"after: {len(models_after)} models",
+            f"stable: {stable}",
+        ]
+        warnings = [] if stable else ["adapter list changed between checks"]
+        return _record(
+            10,
+            "LoRA ID Consistency",
+            4,
+            "passed",
+            elapsed,
+            assertions=assertions,
+            warnings=warnings,
+            detail={"models_before": models_before, "models_after": models_after, "stable": stable},
+        )
+    except Exception as exc:
+        return _record(
+            10,
+            "LoRA ID Consistency",
+            4,
+            "passed",
+            time.monotonic() - t0,
+            assertions=["informational check"],
+            warnings=[f"could not verify: {exc}"],
+        )
+# ---------------------------------------------------------------------------
+# Phase 5: Robustness
+# ---------------------------------------------------------------------------
+async def scenario11_empty_query() -> dict:
+    """Scenario 11: Empty Query Handling (expect 422, NOT 500)."""
+    t0 = time.monotonic()
+    assertions = []
+    last_error = ""
+    for attempt_idx in range(2):
+        try:
+            # REST endpoint
+            code_rest, resp_rest = await http_post("/v2/agent/run", {"query": ""}, timeout=10)
+            assertions.append(f"/v2/agent/run empty query: HTTP {code_rest}")
+            # SSE endpoint
+            code_sse, events_sse = await http_post_sse(
+                "/v2/agent/stream", {"query": ""}, timeout=10
+            )
+            assertions.append(f"/v2/agent/stream empty query: HTTP {code_sse}")
+            elapsed = time.monotonic() - t0
+            # 422 (Pydantic validation) 또는 400 (Bad Request) 허용, 500은 불가
+            rest_ok = code_rest in (400, 422)
+            sse_ok = code_sse in (400, 422)
+            no_500 = code_rest != 500 and code_sse != 500
+            if no_500 and (rest_ok or sse_ok):
+                return _record(
+                    11,
+                    "Empty Query Handling",
+                    5,
+                    "passed",
+                    elapsed,
+                    attempt_idx + 1,
+                    assertions=assertions,
+                    detail={"rest_code": code_rest, "sse_code": code_sse},
+                )
+            if not no_500:
+                last_error = f"got 500 (rest={code_rest}, sse={code_sse})"
+            else:
+                last_error = f"unexpected codes: rest={code_rest}, sse={code_sse}"
+            if attempt_idx < 1:
+                continue
+            return _record(
+                11,
+                "Empty Query Handling",
+                5,
+                "failed",
+                elapsed,
+                attempt_idx + 1,
+                assertions=assertions,
+                error=last_error,
+                detail={"rest_code": code_rest, "sse_code": code_sse},
+            )
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(
+        11, "Empty Query Handling", 5, "failed", time.monotonic() - t0, 2, error=last_error
+    )
+async def scenario12_reject_flow() -> dict:
+    """Scenario 12: Reject Flow Completeness."""
+    last_error = ""
+    for attempt_idx in range(2):
+        t0 = time.monotonic()
+        try:
+            sid = _session_id(12)
+            ok, text, meta, err = await _call_agent_with_approval(
+                "민원 답변을 작성해주세요", sid, approve=False, timeout=30
+            )
+            elapsed = time.monotonic() - t0
+            assertions = []
+            # reject 후에는 tool_results가 비어있어야 함
+            tool_results = meta.get("tool_results", {})
+            if ok:
+                assertions.append("reject flow completed")
+                if not tool_results:
+                    assertions.append("tool_results empty after reject")
+                else:
+                    assertions.append(f"tool_results NOT empty: {list(tool_results.keys())}")
+                if elapsed < 5:
+                    assertions.append(f"response < 5s ({elapsed:.1f}s)")
+                else:
+                    assertions.append(f"response >= 5s ({elapsed:.1f}s)")
+                return _record(
+                    12,
+                    "Reject Flow Completeness",
+                    5,
+                    "passed",
+                    elapsed,
+                    attempt_idx + 1,
+                    assertions=assertions,
+                    detail={"text_preview": text[:100], "tool_results": tool_results, "meta": meta},
+                )
+            last_error = err or "reject flow failed"
+            if attempt_idx < 1:
+                continue
+            return _record(
+                12,
+                "Reject Flow Completeness",
+                5,
+                "failed",
+                elapsed,
+                attempt_idx + 1,
+                assertions=assertions,
+                error=last_error,
+                detail={"meta": meta},
+            )
+        except Exception as exc:
+            last_error = str(exc)
+    return _record(
+        12, "Reject Flow Completeness", 5, "failed", time.monotonic() - t0, 2, error=last_error
+    )
+async def scenario13_concurrent_isolation() -> dict:
+    """Scenario 13: Concurrent Request Isolation (3 simultaneous requests)."""
+    t0 = time.monotonic()
+    queries = [
+        ("주차 위반 민원 답변 초안을 작성해줘", _session_id(13)),
+        ("소음 민원에 대한 답변을 작성해줘", _session_id(13)),
+        ("도로 파손 민원 답변을 작성해줘", _session_id(13)),
+    ]
+    async def _run_one(query: str, sid: str) -> dict:
+        try:
+            ok, text, meta, err = await _call_agent_with_approval(
+                query, sid, approve=True, timeout=300
+            )
+            if meta.get("planned_tools"):
+                _observed_tools.update(meta["planned_tools"])
+            return {
+                "session_id": sid,
+                "ok": ok,
+                "text_len": len(text),
+                "error": err,
+                "query": query[:20],
+            }
+        except Exception as exc:
+            return {
+                "session_id": sid,
+                "ok": False,
+                "text_len": 0,
+                "error": str(exc),
+                "query": query[:20],
+            }
+    tasks = [_run_one(q, s) for q, s in queries]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    elapsed = time.monotonic() - t0
+    # 결과 정리
+    sub_results = []
+    valid_count = 0
+    for r in results:
+        if isinstance(r, Exception):
+            sub_results.append({"ok": False, "error": str(r)})
+        else:
+            sub_results.append(r)
+            if r.get("ok"):
+                valid_count += 1
+    # session_id 교차 오염 확인 (여기서는 각각 독립적 session_id)
+    session_ids = [q[1] for q in queries]
+    all_unique = len(set(session_ids)) == len(session_ids)
+    assertions = [
+        f"{valid_count}/3 concurrent requests succeeded",
+        f"session_ids unique: {all_unique}",
+    ]
+    if valid_count == 3:
+        return _record(
+            13,
+            "Concurrent Request Isolation",
+            5,
+            "passed",
+            elapsed,
+            assertions=assertions,
+            detail={"sub_results": sub_results},
+        )
+    return _record(
+        13,
+        "Concurrent Request Isolation",
+        5,
+        "failed",
+        elapsed,
+        assertions=assertions,
+        error=f"only {valid_count}/3 succeeded",
+        detail={"sub_results": sub_results},
+    )
+# ---------------------------------------------------------------------------
+# Cold Start 대기
+# ---------------------------------------------------------------------------
+async def _wait_cold_start() -> float:
+    """서버 cold start 대기. 최대 10회 x 30초 간격. 대기한 총 시간을 반환."""
+    total_wait = 0.0
+    for i in range(10):
+        try:
+            code, body = await http_get("/health", timeout=10)
+            if code == 200 and body.get("status") in ("ok", "healthy"):
+                print(f"  서버 준비 완료 (대기 {total_wait:.0f}s)")
+                return total_wait
+        except Exception:
+            pass
+        if i < 9:
+            print(f"  서버 대기 중... ({i + 1}/10, 30s 후 재시도)")
+            await asyncio.sleep(30)
+            total_wait += 30
+    print("  [WARN] 서버 준비 확인 실패 — 계속 진행")
+    return total_wait
+# ---------------------------------------------------------------------------
+# 메인 러너
+# ---------------------------------------------------------------------------
+async def main() -> int:
+    print("=" * 60)
+    print("GovOn E2E Tool Calling + AdapterRegistry 검증")
+    print("=" * 60)
+    print(f"  대상 서버: {BASE_URL}")
+    print(f"  인증: {'API_KEY 설정됨' if API_KEY else '미설정 (비인증)'}")
+    print(f"  HTTP 백엔드: {_HTTP_BACKEND}")
+    print(f"  타임아웃: {TIMEOUT}s / 시나리오")
+    print(f"  run_id: {_run_id}")
+    print("-" * 60)
+    # Cold start 대기
+    print("[Cold Start] 서버 준비 확인 중...")
+    cold_start_wait = await _wait_cold_start()
+    # ===== Phase 1: Infrastructure (hard gate) =====
+    print("\n[Phase 1] Infrastructure (hard gate)")
+    print("-" * 40)
+    phase1_scenarios = [
+        scenario1_health_profile,
+        scenario2_base_model_generation,
+        scenario3_adapter_registry,
+    ]
+    phase1_failed = False
+    for fn in phase1_scenarios:
+        result = await fn()
+        if result["status"] == "failed":
+            phase1_failed = True
+    if phase1_failed:
+        print("\n" + "!" * 60)
+        print("ABORT: Infrastructure not ready — Phase 1 failed")
+        print("!" * 60)
+        _write_output(cold_start_wait)
+        return 1
+    # ===== Phase 2: Agent Pipeline Core =====
+    print("\n[Phase 2] Agent Pipeline Core")
+    print("-" * 40)
+    phase2_scenarios = [
+        scenario4_planner_valid_plan,
+        scenario5_civil_lora_draft,
+        scenario6_legal_lora_evidence,
+        scenario7_task_type_classification,
+    ]
+    for fn in phase2_scenarios:
+        await fn()
+    # ===== Phase 3: data.go.kr API Tools (soft gate) =====
+    print("\n[Phase 3] data.go.kr API Tools (soft gate)")
+    print("-" * 40)
+    print("  data.go.kr 연결 확인...")
+    datago_ok = await _check_datago_connectivity()
+    if datago_ok:
+        print("  data.go.kr 연결 가능")
+    else:
+        print("  data.go.kr 연결 불가 — Phase 3 스킵")
+    await scenario8_external_api_tools()
+    # ===== Phase 4: Adapter Dynamics =====
+    print("\n[Phase 4] Adapter Dynamics")
+    print("-" * 40)
+    await scenario9_sequential_adapter_switching()
+    await scenario10_lora_id_consistency()
+    # ===== Phase 5: Robustness =====
+    print("\n[Phase 5] Robustness")
+    print("-" * 40)
+    phase5_scenarios = [
+        scenario11_empty_query,
+        scenario12_reject_flow,
+        scenario13_concurrent_isolation,
+    ]
+    for fn in phase5_scenarios:
+        await fn()
+    # ===== 요약 =====
+    print("\n" + "=" * 60)
+    passed = sum(1 for r in _results if r["status"] == "passed")
+    failed = sum(1 for r in _results if r["status"] == "failed")
+    skipped = sum(1 for r in _results if r["status"] == "skipped")
+    total = len(_results)
+    print(f"결과: {passed}/{total} 통과, {failed} 실패, {skipped} 스킵")
+    tool_ratio = len(_observed_tools) / len(VALID_TOOLS) if VALID_TOOLS else 0
+    print(f"도구 커버리지: {len(_observed_tools)}/{len(VALID_TOOLS)} ({tool_ratio:.0%})")
+    if _observed_tools:
+        print(f"  관측된 도구: {sorted(_observed_tools)}")
+    _write_output(cold_start_wait)
+    return 0 if failed == 0 else 1
+def _write_output(cold_start_wait: float) -> None:
+    """JSON 결과 파일 출력."""
+    from datetime import datetime, timezone
+    passed = sum(1 for r in _results if r["status"] == "passed")
+    failed = sum(1 for r in _results if r["status"] == "failed")
+    skipped = sum(1 for r in _results if r["status"] == "skipped")
+    tool_ratio = len(_observed_tools) / len(VALID_TOOLS) if VALID_TOOLS else 0
+    output = {
+        "meta": {
+            "run_id": _run_id,
+            "timestamp_utc": datetime.now(timezone.utc).isoformat(),
+            "target_url": BASE_URL,
+            "cold_start_wait_seconds": cold_start_wait,
+        },
+        "summary": {
+            "total": len(_results),
+            "passed": passed,
+            "failed": failed,
+            "skipped": skipped,
+            "tool_coverage": {
+                "observed": sorted(_observed_tools),
+                "ratio": round(tool_ratio, 2),
+            },
+        },
+        "scenarios": _results,
+        "server_url": BASE_URL,
+        "http_backend": _HTTP_BACKEND,
+    }
+    with open(RESULTS_PATH, "w", encoding="utf-8") as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+    print(f"\n결과 저장: {RESULTS_PATH}")
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)

scripts/verify_lora_serving.py ADDED Viewed

	@@ -0,0 +1,663 @@

+#!/usr/bin/env python3
+"""GovOn Legal LoRA 어댑터 서빙 통합 검증 스크립트.
+HuggingFace Space에 배포된 govon-runtime 서버에 대해
+legal/civil adapter Multi-LoRA 서빙 동작을 검증한다.
+사용법:
+    GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_lora_serving.py
+    GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_lora_serving.py
+엔드포인트 참고 (src/inference/api_server.py):
+    GET  /health              — 서버 상태 확인 (status: "healthy")
+    POST /v1/completions      — OpenAI-compatible (vLLM 직접 제공)
+    POST /v1/generate         — GovOn 레거시 생성 엔드포인트
+    POST /v2/agent/run        — LangGraph agent (REST, interrupt까지 실행)
+    POST /v2/agent/stream     — LangGraph agent (SSE 스트리밍)
+    GET  /v1/models           — OpenAI-compatible 모델 목록 (vLLM 직접 제공)
+AgentRunRequest 필드:
+    query: str          — 사용자 입력 (필수)
+    session_id: str     — 세션 식별자 (선택)
+    stream: bool        — 스트리밍 여부 (기본값 False)
+    force_tools: list   — 강제 실행 도구 목록 (선택)
+    max_tokens: int     — 최대 토큰 수 (기본값 512)
+    temperature: float  — 온도 (기본값 0.7)
+    use_rag: bool       — RAG 사용 여부 (기본값 True)
+"""
+# stdlib
+import asyncio
+import json
+import logging
+import os
+import re
+import sys
+import time
+from typing import Any, Optional
+from uuid import uuid4
+BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/")
+API_KEY = os.environ.get("API_KEY")
+TIMEOUT = 300  # 시나리오당 최대 대기 시간 (초)
+BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ"
+RESULTS_PATH = "verify_results.json"
+logger = logging.getLogger(__name__)
+# 법령 관련 패턴 (Scenario 4 검증용) — regex 기반, 단일 문자 제외
+LEGAL_PATTERNS = [
+    r"제\s*\d+\s*조",
+    r"제\s*\d+\s*항",
+    r"법률",
+    r"시행령",
+    r"조례",
+    r"판례",
+    r"대법원",
+]
+_results: list[dict] = []
+# ---------------------------------------------------------------------------
+# HTTP 클라이언트 레이어 (httpx 우선, urllib fallback)
+# ---------------------------------------------------------------------------
+try:
+    import httpx
+    _HTTP_BACKEND = "httpx"
+    def _build_headers() -> dict:
+        h = {"Content-Type": "application/json", "Accept": "application/json"}
+        if API_KEY:
+            h["X-API-Key"] = API_KEY
+        return h
+    async def http_get(path: str) -> tuple[int, dict]:
+        url = BASE_URL + path
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            resp = await client.get(url, headers=_build_headers())
+            try:
+                return resp.status_code, resp.json()
+            except Exception:
+                return resp.status_code, {"_raw": resp.text[:200]}
+    async def http_post(path: str, body: dict) -> tuple[int, dict]:
+        url = BASE_URL + path
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            resp = await client.post(url, json=body, headers=_build_headers())
+            try:
+                return resp.status_code, resp.json()
+            except Exception:
+                return resp.status_code, {"_raw": resp.text[:200]}
+    async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
+        """SSE 스트리밍 POST. 청크를 수집하여 파싱된 이벤트 목록을 반환한다."""
+        url = BASE_URL + path
+        h = _build_headers()
+        h["Accept"] = "text/event-stream"
+        events: list[dict] = []
+        status_code = 0
+        async with httpx.AsyncClient(timeout=TIMEOUT) as client:
+            async with client.stream("POST", url, json=body, headers=h) as resp:
+                status_code = resp.status_code
+                async for line in resp.aiter_lines():
+                    line = line.strip()
+                    if not line.startswith("data:"):
+                        continue
+                    payload = line[len("data:") :].strip()
+                    if not payload:
+                        continue
+                    try:
+                        events.append(json.loads(payload))
+                    except json.JSONDecodeError:
+                        events.append({"_raw": payload})
+        return status_code, events
+except ImportError:
+    import urllib.error
+    import urllib.request
+    _HTTP_BACKEND = "urllib"
+    def _build_headers() -> dict:
+        h = {"Content-Type": "application/json", "Accept": "application/json"}
+        if API_KEY:
+            h["X-API-Key"] = API_KEY
+        return h
+    async def http_get(path: str) -> tuple[int, dict]:
+        url = BASE_URL + path
+        req = urllib.request.Request(url, headers=_build_headers(), method="GET")
+        try:
+            with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
+                return r.status, json.loads(r.read().decode())
+        except urllib.error.HTTPError as e:
+            return e.code, {}
+    async def http_post(path: str, body: dict) -> tuple[int, dict]:
+        url = BASE_URL + path
+        data = json.dumps(body).encode()
+        req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST")
+        try:
+            with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
+                return r.status, json.loads(r.read().decode())
+        except urllib.error.HTTPError as e:
+            return e.code, {}
+    async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
+        """urllib fallback: SSE 스트리밍을 동기 방식으로 읽는다."""
+        url = BASE_URL + path
+        data = json.dumps(body).encode()
+        h = _build_headers()
+        h["Accept"] = "text/event-stream"
+        req = urllib.request.Request(url, data=data, headers=h, method="POST")
+        events: list[dict] = []
+        status_code = 0
+        try:
+            with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
+                status_code = r.status
+                for raw_line in r:
+                    line = raw_line.decode("utf-8", errors="replace").strip()
+                    if not line.startswith("data:"):
+                        continue
+                    payload = line[len("data:") :].strip()
+                    if not payload:
+                        continue
+                    try:
+                        events.append(json.loads(payload))
+                    except json.JSONDecodeError:
+                        events.append({"_raw": payload})
+        except urllib.error.HTTPError as e:
+            status_code = e.code
+        return status_code, events
+# ---------------------------------------------------------------------------
+# 결과 기록 / 출력 헬퍼
+# ---------------------------------------------------------------------------
+def _record(
+    scenario_num: int,
+    name: str,
+    passed: bool,
+    elapsed: float,
+    error: Optional[str] = None,
+    detail: Optional[Any] = None,
+) -> dict:
+    tag = "[PASS]" if passed else "[FAIL]"
+    suffix = f"({elapsed:.2f}s)"
+    if passed:
+        print(f"{tag} Scenario {scenario_num}: {name} {suffix}")
+    else:
+        print(f"{tag} Scenario {scenario_num}: {name} — {error} {suffix}")
+    entry = {
+        "scenario": scenario_num,
+        "name": name,
+        "passed": passed,
+        "elapsed_s": round(elapsed, 3),
+        "error": error,
+        "detail": detail,
+    }
+    _results.append(entry)
+    return entry
+def _extract_text_from_events(events: list[dict]) -> str:
+    """SSE 이벤트 목록에서 최종 텍스트를 추출한다.
+    v2/agent/stream 이벤트 구조:
+      - synthesis 노드: {"node": "synthesis", "final_text": "..."}
+      - v1/agent/stream 이벤트: {"text": "...", "finished": true}
+    """
+    # synthesis 노드 final_text 우선
+    for ev in reversed(events):
+        if ev.get("node") == "synthesis" and ev.get("final_text"):
+            return ev["final_text"]
+    # v1 스트리밍 호환: finished=true인 마지막 이벤트의 text
+    for ev in reversed(events):
+        if ev.get("finished") and ev.get("text"):
+            return ev["text"]
+    # 전체 이벤트에서 non-empty text를 이어붙인다 (fallback)
+    chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events]
+    return "".join(c for c in chunks if c)
+def _contains_legal_keyword(text: str) -> bool:
+    return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS)
+# ---------------------------------------------------------------------------
+# 시나리오 구현
+# ---------------------------------------------------------------------------
+async def scenario1_health_check() -> dict:
+    """Scenario 1: Health Check."""
+    t0 = time.monotonic()
+    try:
+        status_code, body = await http_get("/health")
+        elapsed = time.monotonic() - t0
+        if status_code != 200:
+            return _record(1, "Health Check", False, elapsed, f"HTTP {status_code}", {"body": body})
+        # api_server.py: /health는 "status": "healthy" 반환
+        srv_status = body.get("status", "")
+        if srv_status not in ("ok", "healthy"):
+            return _record(
+                1,
+                "Health Check",
+                False,
+                elapsed,
+                f"status 필드가 ok/healthy가 아님: {srv_status!r}",
+                {"body": body},
+            )
+        return _record(1, "Health Check", True, elapsed, detail={"status": srv_status})
+    except Exception as exc:
+        return _record(1, "Health Check", False, time.monotonic() - t0, str(exc))
+async def scenario2_base_model_generation() -> dict:
+    """Scenario 2: Base Model Generation (OpenAI-compatible /v1/completions).
+    vLLM이 /v1/completions 엔드포인트를 직접 노출한다.
+    GovOn api_server.py에 구현되어 있지 않으므로 vLLM 레이어 엔드포인트를 사용한다.
+    서버가 /v1/completions를 지원하지 않으면 /v1/generate 레거시로 fallback한다.
+    """
+    t0 = time.monotonic()
+    body_completions = {
+        "model": BASE_MODEL,
+        "prompt": "대한민국 수도는 어디입니까?",
+        "max_tokens": 64,
+        "temperature": 0.0,
+    }
+    try:
+        status_code, resp = await http_post("/v1/completions", body_completions)
+        elapsed = time.monotonic() - t0
+        # vLLM /v1/completions 응답 구조 확인
+        if status_code == 200:
+            choices = resp.get("choices", [])
+            if choices and choices[0].get("text") is not None:
+                text = choices[0]["text"]
+                return _record(
+                    2,
+                    "Base Model Generation",
+                    True,
+                    elapsed,
+                    detail={"endpoint": "/v1/completions", "text_preview": text[:100]},
+                )
+            return _record(
+                2, "Base Model Generation", False, elapsed, "choices[0].text 없음", {"resp": resp}
+            )
+        # /v1/completions 미지원 시 /v1/generate 레거시로 fallback
+        body_legacy = {
+            "prompt": "대한민국 수도는 어디입니까?",
+            "max_tokens": 64,
+            "temperature": 0.0,
+            "use_rag": False,
+        }
+        status_code2, resp2 = await http_post("/v1/generate", body_legacy)
+        elapsed2 = time.monotonic() - t0
+        if status_code2 == 200 and resp2.get("text"):
+            return _record(
+                2,
+                "Base Model Generation",
+                True,
+                elapsed2,
+                detail={"endpoint": "/v1/generate (fallback)", "text_preview": resp2["text"][:100]},
+            )
+        return _record(
+            2,
+            "Base Model Generation",
+            False,
+            elapsed2,
+            f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}",
+            {"completions_resp": resp, "generate_resp": resp2},
+        )
+    except Exception as exc:
+        return _record(2, "Base Model Generation", False, time.monotonic() - t0, str(exc))
+async def _call_agent(
+    message: str,
+    session_id: str,
+    use_stream: bool = True,
+) -> tuple[bool, str, Optional[str]]:
+    """에이전트 엔드포인트를 호출하고 (성공여부, 응답텍스트, 에러) 를 반환한다.
+    v2/agent/stream (SSE) → v2/agent/run (REST) 순으로 시도한다.
+    use_rag=False를 기본으로 전달하여 LoRA 경로를 강제한다.
+    """
+    body = {"query": message, "session_id": session_id, "use_rag": False}
+    # v2/agent/stream 시도 (SSE)
+    if use_stream:
+        try:
+            status_code, events = await http_post_sse("/v2/agent/stream", body)
+            if status_code == 200 and events:
+                text = _extract_text_from_events(events)
+                if text:
+                    return True, text, None
+                # 이벤트는 수신했지만 text가 없는 경우 — error 이벤트 확인
+                for ev in events:
+                    if ev.get("status") == "error":
+                        return False, "", ev.get("error", "unknown error")
+                # __interrupt__ 또는 awaiting_approval 이벤트 → 자동 승인 후 최종 텍스트 수집
+                # LangGraph interrupt()는 "__interrupt__" 노드로 emit됨
+                awaiting = next(
+                    (
+                        ev
+                        for ev in events
+                        if ev.get("status") == "awaiting_approval"
+                        or ev.get("node") == "__interrupt__"
+                    ),
+                    None,
+                )
+                if awaiting:
+                    thread_id = awaiting.get("thread_id") or session_id
+                    try:
+                        approve_code, approve_resp = await http_post(
+                            f"/v2/agent/approve?thread_id={thread_id}&approved=true", {}
+                        )
+                        if approve_code == 200:
+                            final_text = approve_resp.get("text", "") or approve_resp.get(
+                                "final_text", ""
+                            )
+                            if final_text:
+                                return True, final_text, None
+                            return False, "", f"approve 200 but text 없음: {approve_resp}"
+                        return False, "", f"approve HTTP {approve_code}: {approve_resp}"
+                    except Exception as approve_exc:
+                        return False, "", f"approve 호출 실패: {approve_exc}"
+                return False, "", f"SSE 이벤트 수신했으나 text 없음 (events={len(events)})"
+        except Exception as exc:
+            logger.warning("Stream error: %s", exc)  # fallback to /v2/agent/run
+    # v2/agent/run 시도 (REST)
+    try:
+        status_code, resp = await http_post("/v2/agent/run", body)
+        if status_code == 200:
+            text = resp.get("text", "") or resp.get("final_text", "")
+            if resp.get("status") == "error":
+                return False, text, resp.get("error", "agent run error")
+            if text:
+                return True, text, None
+            # awaiting_approval 상태 — 실제 텍스트 생성 없음으로 failure 처리
+            if resp.get("status") == "awaiting_approval":
+                return (
+                    False,
+                    "",
+                    f"awaiting_approval: 텍스트 미생성 (thread_id={resp.get('thread_id')})",
+                )
+            return False, "", f"text 없음, status={resp.get('status')}"
+        return False, "", f"HTTP {status_code}: {resp}"
+    except Exception as exc:
+        return False, "", str(exc)
+# Scenario 3/4 공유 세션 ID (동일 run에서 같은 세션 사용)
+_RUN_SESSION_ID = str(uuid4())
+async def scenario3_civil_lora() -> dict:
+    """Scenario 3: Civil LoRA — draft_civil_response (v2/agent/stream)."""
+    t0 = time.monotonic()
+    try:
+        ok, text, err = await _call_agent(
+            message="주차 위반 과태료 이의신청 민원에 대한 답변 초안을 작성해줘",
+            session_id=_RUN_SESSION_ID,
+        )
+        elapsed = time.monotonic() - t0
+        if not ok:
+            return _record(
+                3,
+                "Civil LoRA (draft_civil_response)",
+                False,
+                elapsed,
+                err,
+                {"text_preview": text[:200] if text else ""},
+            )
+        if not text.strip():
+            return _record(
+                3, "Civil LoRA (draft_civil_response)", False, elapsed, "응답 텍스트가 비어있음"
+            )
+        return _record(
+            3,
+            "Civil LoRA (draft_civil_response)",
+            True,
+            elapsed,
+            detail={"text_preview": text[:200]},
+        )
+    except Exception as exc:
+        return _record(
+            3, "Civil LoRA (draft_civil_response)", False, time.monotonic() - t0, str(exc)
+        )
+async def scenario4_legal_lora() -> dict:
+    """Scenario 4: Legal LoRA — append_evidence (v2/agent/stream).
+    독립 세션에서 민원 답변 초안 요청 후 동일 세션에서 법령 근거 보강을 요청한다.
+    응답에 법령/조항 관련 패턴이 포함되어 있는지 확인한다.
+    """
+    t0 = time.monotonic()
+    session_id = str(uuid4())
+    try:
+        # 동일 세션에서 civil 요청 먼저 (append_evidence는 이전 답변 컨텍스트 필요)
+        ok_civil, _, err_civil = await _call_agent(
+            message="건축 허가 신청 민원에 대한 답변 초안을 작성해줘",
+            session_id=session_id,
+        )
+        if not ok_civil:
+            elapsed = time.monotonic() - t0
+            return _record(
+                4,
+                "Legal LoRA (append_evidence)",
+                False,
+                elapsed,
+                f"civil 선행 요청 실패: {err_civil}",
+            )
+        ok, text, err = await _call_agent(
+            message="위 답변에 관련 법령과 판례 근거를 보강해줘",
+            session_id=session_id,
+        )
+        elapsed = time.monotonic() - t0
+        if not ok:
+            return _record(
+                4,
+                "Legal LoRA (append_evidence)",
+                False,
+                elapsed,
+                err,
+                {"text_preview": text[:200] if text else ""},
+            )
+        if not text.strip():
+            return _record(
+                4, "Legal LoRA (append_evidence)", False, elapsed, "응답 텍스트가 비어있음"
+            )
+        has_legal = _contains_legal_keyword(text)
+        matched = [p for p in LEGAL_PATTERNS if re.search(p, text)]
+        detail = {
+            "has_legal_keyword": has_legal,
+            "matched_patterns": matched,
+            "text_preview": text[:300],
+        }
+        if not has_legal:
+            return _record(
+                4,
+                "Legal LoRA (append_evidence)",
+                False,
+                elapsed,
+                f"법령 패턴 미발견 ({LEGAL_PATTERNS[:3]}...)",
+                detail,
+            )
+        return _record(4, "Legal LoRA (append_evidence)", True, elapsed, detail=detail)
+    except Exception as exc:
+        return _record(4, "Legal LoRA (append_evidence)", False, time.monotonic() - t0, str(exc))
+async def scenario5_sequential_multi_lora_switching() -> dict:
+    """Scenario 5: Sequential Multi-LoRA Switching (civil → legal x3).
+    civil 요청 → legal 요청을 3회 반복하여 LoRA 전환 오류가 없는지 확인한다.
+    반복마다 별도의 UUID 세션 ID를 사용한다.
+    """
+    t0 = time.monotonic()
+    errors: list[str] = []
+    iterations = 3
+    for i in range(1, iterations + 1):
+        session_id = str(uuid4())
+        # civil 요청
+        ok, text, err = await _call_agent(
+            message="행정처분 이의신청 민원 답변 초안을 작성해줘",
+            session_id=session_id,
+        )
+        if not ok or not text.strip():
+            errors.append(f"iter {i} civil: {err or '빈 응답'}")
+            continue
+        # legal 요청 (동일 세션)
+        ok2, text2, err2 = await _call_agent(
+            message="위 답변에 관련 법령 근거를 추가해줘",
+            session_id=session_id,
+        )
+        if not ok2 or not text2.strip():
+            errors.append(f"iter {i} legal: {err2 or '빈 응답'}")
+    elapsed = time.monotonic() - t0
+    if errors:
+        return _record(
+            5,
+            "Sequential Multi-LoRA Switching",
+            False,
+            elapsed,
+            "; ".join(errors),
+            {"iterations": iterations, "errors": errors},
+        )
+    return _record(
+        5,
+        "Sequential Multi-LoRA Switching",
+        True,
+        elapsed,
+        detail={"iterations": iterations, "all_passed": True},
+    )
+async def scenario6_lora_id_consistency() -> dict:
+    """Scenario 6: LoRA ID Consistency Check (정보성).
+    /v1/models (vLLM OpenAI-compatible)에서 civil/legal 어댑터 노출 여부를 확인한다.
+    vLLM은 버전/설정에 따라 LoRA 어댑터를 /v1/models에 노출하지 않을 수 있으므로,
+    미감지 시 FAIL이 아닌 WARNING으로 기록하고 전체 결과에 영향을 주지 않는다.
+    """
+    t0 = time.monotonic()
+    try:
+        status_code, health = await http_get("/health")
+        elapsed = time.monotonic() - t0
+        if status_code != 200:
+            return _record(
+                6, "LoRA ID Consistency Check", False, elapsed, f"/health HTTP {status_code}"
+            )
+        detail: dict = {"health_status": health.get("status")}
+        # /health feature_flags / agents_loaded 정보 기록
+        detail["agents_loaded"] = health.get("agents_loaded", [])
+        detail["model"] = health.get("model", "")
+        detail["feature_flags"] = health.get("feature_flags", {})
+        civil_found = False
+        legal_found = False
+        # /v1/models 시도 (vLLM OpenAI-compatible)
+        try:
+            models_status, models_resp = await http_get("/v1/models")
+            if models_status == 200:
+                model_ids = [m.get("id", "") for m in models_resp.get("data", [])]
+                detail["v1_models"] = model_ids
+                civil_found = any("civil" in mid for mid in model_ids)
+                legal_found = any("legal" in mid for mid in model_ids)
+                detail["civil_adapter_in_models"] = civil_found
+                detail["legal_adapter_in_models"] = legal_found
+        except Exception as exc:
+            logger.warning("Failed to fetch /v1/models: %s", exc)
+            detail["v1_models"] = "unavailable"
+        # vLLM이 /v1/models에 어댑터를 노출하지 않을 수 있으므로 정보성 기록만 수행
+        if not civil_found or not legal_found:
+            missing = []
+            if not civil_found:
+                missing.append("civil")
+            if not legal_found:
+                missing.append("legal")
+            detail["warning"] = f"어댑터 미감지 (vLLM 버전에 따라 정상): {', '.join(missing)}"
+            logger.warning(detail["warning"])
+        return _record(6, "LoRA ID Consistency Check", True, time.monotonic() - t0, detail=detail)
+    except Exception as exc:
+        return _record(6, "LoRA ID Consistency Check", False, time.monotonic() - t0, str(exc))
+# ---------------------------------------------------------------------------
+# 메인 러너
+# ---------------------------------------------------------------------------
+async def main() -> int:
+    print("GovOn Legal LoRA 서빙 통합 검증")
+    print(f"  대상 서버: {BASE_URL}")
+    print(f"  인증: {'API_KEY 설정됨' if API_KEY else '미설정 (비인증)'}")
+    print(f"  HTTP 백엔드: {_HTTP_BACKEND}")
+    print(f"  타임아웃: {TIMEOUT}s / 시나리오")
+    print("-" * 60)
+    scenarios = [
+        scenario1_health_check,
+        scenario2_base_model_generation,
+        scenario3_civil_lora,
+        scenario4_legal_lora,
+        scenario5_sequential_multi_lora_switching,
+        scenario6_lora_id_consistency,
+    ]
+    for fn in scenarios:
+        await fn()
+    print("-" * 60)
+    passed = sum(1 for r in _results if r["passed"])
+    failed = len(_results) - passed
+    print(f"결과: {passed}/{len(_results)} 통과, {failed} 실패")
+    # JSON 결과 저장
+    output = {
+        "server_url": BASE_URL,
+        "http_backend": _HTTP_BACKEND,
+        "total": len(_results),
+        "passed": passed,
+        "failed": failed,
+        "scenarios": _results,
+    }
+    with open(RESULTS_PATH, "w", encoding="utf-8") as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+    print(f"결과 저장: {RESULTS_PATH}")
+    return 0 if failed == 0 else 1
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)

scripts/verify_results.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "server_url": "https://umyunsang-govon-runtime.hf.space",
+  "http_backend": "httpx",
+  "total": 6,
+  "passed": 5,
+  "failed": 1,
+  "scenarios": [
+    {
+      "scenario": 1,
+      "name": "Health Check",
+      "passed": true,
+      "elapsed_s": 1.092,
+      "error": null,
+      "detail": {
+        "status": "healthy"
+      }
+    },
+    {
+      "scenario": 2,
+      "name": "Base Model Generation",
+      "passed": true,
+      "elapsed_s": 7.963,
+      "error": null,
+      "detail": {
+        "endpoint": "/v1/generate (fallback)",
+        "text_preview": "We are dealing with a very basic factual question about the capital of South Korea. \n The user's que"
+      }
+    },
+    {
+      "scenario": 3,
+      "name": "Civil LoRA (draft_civil_response)",
+      "passed": true,
+      "elapsed_s": 38.797,
+      "error": null,
+      "detail": {
+        "text_preview": "요청을 처리할 수 없습니다."
+      }
+    },
+    {
+      "scenario": 4,
+      "name": "Legal LoRA (append_evidence)",
+      "passed": false,
+      "elapsed_s": 25.521,
+      "error": "법령 패턴 미발견 (['제\\\\s*\\\\d+\\\\s*조', '제\\\\s*\\\\d+\\\\s*항', '법률']...)",
+      "detail": {
+        "has_legal_keyword": false,
+        "matched_patterns": [],
+        "text_preview": "요청을 처리할 수 없습니다."
+      }
+    },
+    {
+      "scenario": 5,
+      "name": "Sequential Multi-LoRA Switching",
+      "passed": true,
+      "elapsed_s": 146.962,
+      "error": null,
+      "detail": {
+        "iterations": 3,
+        "all_passed": true
+      }
+    },
+    {
+      "scenario": 6,
+      "name": "LoRA ID Consistency Check",
+      "passed": true,
+      "elapsed_s": 1.889,
+      "error": null,
+      "detail": {
+        "health_status": "healthy",
+        "agents_loaded": [
+          "generator_civil_response",
+          "retriever"
+        ],
+        "model": "LGAI-EXAONE/EXAONE-4.0-32B-AWQ",
+        "feature_flags": {
+          "use_rag_pipeline": true,
+          "model_version": "v2_lora"
+        },
+        "warning": "어댑터 미감지 (vLLM 버전에 따라 정상): civil, legal"
+      }
+    }
+  ]
+}