umyunsang commited on
Commit
769e684
ยท
verified ยท
1 Parent(s): 7ec9957

sync: scripts/ (verify_e2e_tool_calling.py)

Browse files
scripts/.DS_Store ADDED
Binary file (6.15 kB). View file
 
scripts/__pycache__/verify_e2e_tool_calling.cpython-313.pyc ADDED
Binary file (61.3 kB). View file
 
scripts/deploy-hfspace.sh ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # GovOn Runtime์„ HuggingFace Spaces์— ๋ฐฐํฌํ•˜๋Š” ์Šคํฌ๋ฆฝํŠธ
5
+ # Usage: ./scripts/deploy-hfspace.sh
6
+
7
+ export SPACE_REPO="${SPACE_REPO:-umyunsang/govon-runtime}"
8
+ export HF_TOKEN="${HF_TOKEN:?HF_TOKEN ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค}"
9
+
10
+ echo "=== GovOn HF Spaces ๋ฐฐํฌ ==="
11
+ echo "Space: $SPACE_REPO"
12
+
13
+ # 1. Space ์ƒ์„ฑ (์ด๋ฏธ ์žˆ์œผ๋ฉด skip)
14
+ python3 -c "
15
+ import os
16
+ from huggingface_hub import create_repo
17
+ is_private = os.environ.get('SPACE_PRIVATE', 'false').lower() in ('true', '1', 'yes')
18
+ create_repo(os.environ['SPACE_REPO'], repo_type='space', space_sdk='docker', exist_ok=True, token=os.environ['HF_TOKEN'], private=is_private)
19
+ print('Space repo ready')
20
+ "
21
+
22
+ # 2. ํ•„์š” ํŒŒ์ผ ์—…๋กœ๋“œ
23
+ python3 -c "
24
+ import os
25
+ from huggingface_hub import HfApi
26
+ api = HfApi(token=os.environ['HF_TOKEN'])
27
+ space_repo = os.environ['SPACE_REPO']
28
+
29
+ # Dockerfile
30
+ api.upload_file(path_or_fileobj='Dockerfile.hfspace', path_in_repo='Dockerfile',
31
+ repo_id=space_repo, repo_type='space')
32
+
33
+ # requirements.txt
34
+ api.upload_file(path_or_fileobj='requirements.txt', path_in_repo='requirements.txt',
35
+ repo_id=space_repo, repo_type='space')
36
+
37
+ # src/ ๋””๋ ‰ํ„ฐ๋ฆฌ
38
+ api.upload_folder(folder_path='src', path_in_repo='src',
39
+ repo_id=space_repo, repo_type='space',
40
+ ignore_patterns=['__pycache__', '*.pyc', '.pytest_cache'])
41
+
42
+ # agents/ ๋””๋ ‰ํ„ฐ๋ฆฌ (์กด์žฌํ•˜๋ฉด)
43
+ if os.path.isdir('agents'):
44
+ api.upload_folder(folder_path='agents', path_in_repo='agents',
45
+ repo_id=space_repo, repo_type='space')
46
+
47
+ print('Files uploaded')
48
+ "
49
+
50
+ # 3. Secrets ์„ค์ •
51
+ python3 -c "
52
+ import os
53
+ from huggingface_hub import HfApi
54
+ api = HfApi(token=os.environ['HF_TOKEN'])
55
+ api.add_space_secret(os.environ['SPACE_REPO'], 'HF_TOKEN', os.environ['HF_TOKEN'])
56
+ # ADAPTER_PATHS: HF Hub repo ID ๋ฐฉ์‹ (vLLM์ด ์ž๋™ ๋‹ค์šด๋กœ๋“œ)
57
+ adapter_paths = os.environ.get('ADAPTER_PATHS', 'civil=umyunsang/govon-civil-adapter,legal=siwo/govon-legal-adapter')
58
+ api.add_space_secret(os.environ['SPACE_REPO'], 'ADAPTER_PATHS', adapter_paths)
59
+ print('Secrets configured')
60
+ "
61
+
62
+ # 4. ํ•˜๋“œ์›จ์–ด ์„ค์ • (L4 24GB โ€” base 20GB + adapters ~1GB)
63
+ # ๊ถŒํ•œ/์ฟผํ„ฐ ๋ถ€์กฑ ์‹œ ๊ฒฝ๊ณ ๋งŒ ์ถœ๋ ฅํ•˜๊ณ  ๊ณ„์† ์ง„ํ–‰
64
+ python3 -c "
65
+ import os
66
+ from huggingface_hub import HfApi
67
+ api = HfApi(token=os.environ['HF_TOKEN'])
68
+ try:
69
+ api.request_space_hardware(os.environ['SPACE_REPO'], 'l4x1')
70
+ print('Hardware set to l4x1 (24GB VRAM)')
71
+ except Exception as e:
72
+ print(f'WARNING: ํ•˜๋“œ์›จ์–ด ์„ค์ • ์‹คํŒจ (์ˆ˜๋™์œผ๋กœ ์„ค์ • ํ•„์š”): {e}')
73
+ " || true
74
+
75
+ echo ""
76
+ echo "=== ๋ฐฐํฌ ์™„๋ฃŒ ==="
77
+ echo "Space URL: https://huggingface.co/spaces/$SPACE_REPO"
78
+ echo "ํ•˜๋“œ์›จ์–ด: L4 24GB (์ž๋™ ์„ค์ •๋จ)"
79
+ echo ""
80
+ echo "GPU ๊ฒ€์ฆ ์‹คํ–‰:"
81
+ echo " GOVON_RUNTIME_URL=https://<space-url> python3 scripts/verify_lora_serving.py"
scripts/deploy.sh ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
5
+ # GovOn Blue/Green Deployment Script
6
+ #
7
+ # Usage:
8
+ # ./scripts/deploy.sh deploy <image-tag> Deploy new version
9
+ # ./scripts/deploy.sh rollback Rollback to previous version
10
+ # ./scripts/deploy.sh status Show current deployment status
11
+ # ./scripts/deploy.sh health Check health of active deployment
12
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
13
+
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
16
+ COMPOSE_FILE="${PROJECT_DIR}/docker-compose.prod.yml"
17
+ STATE_FILE="${PROJECT_DIR}/.deploy-state"
18
+ HEALTH_TIMEOUT=120
19
+ HEALTH_INTERVAL=5
20
+
21
+ # Colors
22
+ RED='\033[0;31m'
23
+ GREEN='\033[0;32m'
24
+ YELLOW='\033[1;33m'
25
+ NC='\033[0m'
26
+
27
+ log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
28
+ log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
29
+ log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
30
+
31
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
32
+ # State management
33
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
+
35
+ get_active_slot() {
36
+ if [ -f "$STATE_FILE" ]; then
37
+ cat "$STATE_FILE"
38
+ else
39
+ echo "none"
40
+ fi
41
+ }
42
+
43
+ get_inactive_slot() {
44
+ local active
45
+ active=$(get_active_slot)
46
+ if [ "$active" = "blue" ]; then
47
+ echo "green"
48
+ else
49
+ echo "blue"
50
+ fi
51
+ }
52
+
53
+ get_slot_port() {
54
+ local slot=$1
55
+ if [ "$slot" = "blue" ]; then echo 8001; else echo 8002; fi
56
+ }
57
+
58
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
59
+ # Health check with retry
60
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
61
+
62
+ wait_for_health() {
63
+ local port=$1
64
+ local elapsed=0
65
+ log_info "ํ—ฌ์Šค์ฒดํฌ ๋Œ€๊ธฐ ์ค‘ (ํฌํŠธ: ${port}, ํƒ€์ž„์•„์›ƒ: ${HEALTH_TIMEOUT}์ดˆ)..."
66
+
67
+ while [ $elapsed -lt $HEALTH_TIMEOUT ]; do
68
+ if curl -sf "http://localhost:${port}/health" > /dev/null 2>&1; then
69
+ echo ""
70
+ log_info "ํ—ฌ์Šค์ฒดํฌ ํ†ต๊ณผ (${elapsed}์ดˆ ์†Œ์š”)"
71
+ return 0
72
+ fi
73
+ sleep $HEALTH_INTERVAL
74
+ elapsed=$((elapsed + HEALTH_INTERVAL))
75
+ printf "."
76
+ done
77
+
78
+ echo ""
79
+ log_error "ํ—ฌ์Šค์ฒดํฌ ์‹คํŒจ (${HEALTH_TIMEOUT}์ดˆ ํƒ€์ž„์•„์›ƒ)"
80
+ return 1
81
+ }
82
+
83
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
84
+ # Prerequisites check
85
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
86
+
87
+ check_prerequisites() {
88
+ if ! command -v docker &>/dev/null; then
89
+ log_error "Docker๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."
90
+ exit 1
91
+ fi
92
+
93
+ if ! docker compose version &>/dev/null; then
94
+ log_error "Docker Compose๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."
95
+ exit 1
96
+ fi
97
+
98
+ if [ ! -f "$COMPOSE_FILE" ]; then
99
+ log_error "Compose ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: ${COMPOSE_FILE}"
100
+ exit 1
101
+ fi
102
+ }
103
+
104
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
105
+ # Deploy new version
106
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
107
+
108
+ cmd_deploy() {
109
+ local image_tag="${1:-latest}"
110
+ local active
111
+ local target
112
+ local target_port
113
+
114
+ active=$(get_active_slot)
115
+ target=$(get_inactive_slot)
116
+ target_port=$(get_slot_port "$target")
117
+
118
+ check_prerequisites
119
+
120
+ log_info "=== GovOn ๋ฐฐํฌ ์‹œ์ž‘: v${image_tag} ==="
121
+ log_info "ํ˜„์žฌ ํ™œ์„ฑ ์Šฌ๋กฏ: ${active}"
122
+ log_info "๋ฐฐํฌ ๋Œ€์ƒ ์Šฌ๋กฏ: ${target}"
123
+ echo ""
124
+
125
+ # Set the tag for the target slot
126
+ if [ "$target" = "blue" ]; then
127
+ export BLUE_TAG="$image_tag"
128
+ else
129
+ export GREEN_TAG="$image_tag"
130
+ fi
131
+
132
+ # Pull new image
133
+ log_info "์ด๋ฏธ์ง€ ํ’€๋ง: ghcr.io/govon-org/govon:${image_tag}..."
134
+ docker pull "ghcr.io/govon-org/govon:${image_tag}"
135
+
136
+ # Create volume directories
137
+ mkdir -p "${PROJECT_DIR}/models" "${PROJECT_DIR}/data" "${PROJECT_DIR}/agents" "${PROJECT_DIR}/configs"
138
+
139
+ # Start target slot
140
+ log_info "${target} ์Šฌ๋กฏ ์‹œ์ž‘ ์ค‘..."
141
+ docker compose -f "$COMPOSE_FILE" --profile "$target" up -d
142
+
143
+ # Wait for health
144
+ if wait_for_health "$target_port"; then
145
+ log_info "${target} ๋ฐฐํฌ๊ฐ€ ์ •์ƒ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค!"
146
+
147
+ # Update state
148
+ echo "$target" > "$STATE_FILE"
149
+ log_info "ํ™œ์„ฑ ์Šฌ๋กฏ ๋ณ€๊ฒฝ: ${active} -> ${target}"
150
+
151
+ # Stop previous slot
152
+ if [ "$active" != "none" ]; then
153
+ log_info "์ด์ „ ${active} ์Šฌ๋กฏ ์ค‘์ง€ ์ค‘..."
154
+ docker compose -f "$COMPOSE_FILE" --profile "$active" down
155
+ fi
156
+
157
+ echo ""
158
+ log_info "=== ๋ฐฐํฌ ์™„๋ฃŒ ==="
159
+ cmd_status
160
+ else
161
+ log_error "๋ฐฐํฌ ์‹คํŒจ! ๋กค๋ฐฑ ์ˆ˜ํ–‰ ์ค‘..."
162
+ docker compose -f "$COMPOSE_FILE" --profile "$target" down
163
+ log_error "์‹คํŒจํ•œ ๋ฐฐํฌ๋ฅผ ์ •๋ฆฌํ–ˆ์Šต๋‹ˆ๋‹ค. ์ด์ „ ๋ฒ„์ „์ด ๊ณ„์† ํ™œ์„ฑ ์ƒํƒœ์ž…๋‹ˆ๋‹ค."
164
+ exit 1
165
+ fi
166
+ }
167
+
168
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
169
+ # Rollback to previous version
170
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
171
+
172
+ cmd_rollback() {
173
+ local active
174
+ local previous
175
+ local prev_port
176
+
177
+ active=$(get_active_slot)
178
+ previous=$(get_inactive_slot)
179
+ prev_port=$(get_slot_port "$previous")
180
+
181
+ check_prerequisites
182
+
183
+ if [ "$active" = "none" ]; then
184
+ log_error "๋กค๋ฐฑํ•  ํ™œ์„ฑ ๋ฐฐํฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
185
+ exit 1
186
+ fi
187
+
188
+ log_warn "=== ๋กค๋ฐฑ ์‹œ์ž‘: ${active} -> ${previous} ==="
189
+
190
+ # Start previous slot
191
+ docker compose -f "$COMPOSE_FILE" --profile "$previous" up -d
192
+
193
+ if wait_for_health "$prev_port"; then
194
+ # Stop current active
195
+ docker compose -f "$COMPOSE_FILE" --profile "$active" down
196
+ echo "$previous" > "$STATE_FILE"
197
+ echo ""
198
+ log_info "=== ๋กค๋ฐฑ ์™„๋ฃŒ. ํ™œ์„ฑ ์Šฌ๋กฏ: ${previous} ==="
199
+ cmd_status
200
+ else
201
+ log_error "๋กค๋ฐฑ ์‹คํŒจ! ์ˆ˜๋™ ์กฐ์น˜๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค."
202
+ log_error "ํ˜„์žฌ ํ™œ์„ฑ ์Šฌ๋กฏ(${active})์€ ๊ทธ๋Œ€๋กœ ์œ ์ง€๋ฉ๋‹ˆ๋‹ค."
203
+ docker compose -f "$COMPOSE_FILE" --profile "$previous" down
204
+ exit 1
205
+ fi
206
+ }
207
+
208
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
209
+ # Show deployment status
210
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
211
+
212
+ cmd_status() {
213
+ local active
214
+ active=$(get_active_slot)
215
+ local blue_status
216
+ local green_status
217
+
218
+ blue_status=$(docker ps --filter name=govon-blue --format '{{.Status}}' 2>/dev/null || echo "stopped")
219
+ green_status=$(docker ps --filter name=govon-green --format '{{.Status}}' 2>/dev/null || echo "stopped")
220
+
221
+ [ -z "$blue_status" ] && blue_status="stopped"
222
+ [ -z "$green_status" ] && green_status="stopped"
223
+
224
+ echo ""
225
+ echo "========================================"
226
+ echo " GovOn ๋ฐฐํฌ ์ƒํƒœ"
227
+ echo "========================================"
228
+ echo " ํ™œ์„ฑ ์Šฌ๋กฏ : ${active}"
229
+ echo " Blue (8001): ${blue_status}"
230
+ echo " Green (8002): ${green_status}"
231
+ echo "========================================"
232
+ }
233
+
234
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
235
+ # Health check
236
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
237
+
238
+ cmd_health() {
239
+ local active
240
+ local port
241
+
242
+ active=$(get_active_slot)
243
+ if [ "$active" = "none" ]; then
244
+ log_error "ํ™œ์„ฑ ๋ฐฐํฌ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
245
+ exit 1
246
+ fi
247
+
248
+ port=$(get_slot_port "$active")
249
+
250
+ if curl -sf "http://localhost:${port}/health" > /dev/null 2>&1; then
251
+ log_info "ํ™œ์„ฑ ๋ฐฐํฌ(${active})๊ฐ€ ์ •์ƒ์ž…๋‹ˆ๋‹ค."
252
+ else
253
+ log_error "ํ™œ์„ฑ ๋ฐฐํฌ(${active})๊ฐ€ ๋น„์ •์ƒ์ž…๋‹ˆ๋‹ค!"
254
+ exit 1
255
+ fi
256
+ }
257
+
258
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
259
+ # Main
260
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
261
+
262
+ case "${1:-help}" in
263
+ deploy) cmd_deploy "${2:-latest}" ;;
264
+ rollback) cmd_rollback ;;
265
+ status) cmd_status ;;
266
+ health) cmd_health ;;
267
+ *)
268
+ echo "GovOn Blue/Green ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ"
269
+ echo ""
270
+ echo "์‚ฌ์šฉ๋ฒ•: $0 {deploy <tag>|rollback|status|health}"
271
+ echo ""
272
+ echo "๋ช…๋ น์–ด:"
273
+ echo " deploy <tag> ์ƒˆ ๋ฒ„์ „ ๋ฐฐํฌ (๊ธฐ๋ณธ๊ฐ’: latest)"
274
+ echo " rollback ์ด์ „ ๋ฒ„์ „์œผ๋กœ ๋กค๋ฐฑ"
275
+ echo " status ํ˜„์žฌ ๋ฐฐํฌ ์ƒํƒœ ํ™•์ธ"
276
+ echo " health ํ™œ์„ฑ ๋ฐฐํฌ ํ—ฌ์Šค์ฒดํฌ"
277
+ exit 1
278
+ ;;
279
+ esac
scripts/final_api_check.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import requests
4
+
5
+
6
+ def test_law():
7
+ oc = os.getenv("LAW_GO_KR_OC")
8
+ url = f"http://www.law.go.kr/DRF/lawSearch.do?target=law&query=๋ฏผ์›&type=XML&OC={oc}"
9
+ try:
10
+ res = requests.get(url, timeout=10)
11
+ print(f"[LAW] Status: {res.status_code}")
12
+ if "์‚ฌ์šฉ์ž ์ •๋ณด ๊ฒ€์ฆ์— ์‹คํŒจ" in res.text:
13
+ print("[LAW] โŒ IP ๋ฏธ์Šน์ธ ์ƒํƒœ (๋“ฑ๋กํ•œ IP๊ฐ€ ๋ฐ˜์˜๋˜์ง€ ์•Š์•˜๊ฑฐ๋‚˜ ๋‹ค๋ฆ„)")
14
+ elif "<law" in res.text:
15
+ print("[LAW] โœ… ์ธ์ฆ ์„ฑ๊ณต! ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ๊ฐ€๋Šฅ")
16
+ else:
17
+ print(f"[LAW] โš ๏ธ ์‘๋‹ต ํ™•์ธ ํ•„์š” (๋‚ด์šฉ ์ผ๋ถ€): {res.text[:200]}")
18
+ except Exception as e:
19
+ print(f"[LAW] โŒ ์—๋Ÿฌ: {e}")
20
+
21
+
22
+ def test_alio():
23
+ key = os.getenv("DATA_GO_KR_API_KEY")
24
+ # Decoding ํ‚ค ์‚ฌ์šฉ
25
+ url = "https://apis.data.go.kr/1051000/public_inst/list"
26
+ params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
27
+ try:
28
+ res = requests.get(url, params=params, timeout=10)
29
+ print(f"[ALIO] Status: {res.status_code}")
30
+ if res.status_code == 200:
31
+ if "SERVICE_KEY_IS_NOT_REGISTERED" in res.text:
32
+ print("[ALIO] โŒ ํ‚ค ๋ฏธํ™œ์„ฑ ์ƒํƒœ (๋™๊ธฐํ™” ๋Œ€๊ธฐ ์ค‘)")
33
+ elif "INVALID_REQUEST_PARAMETER_ERROR" in res.text:
34
+ print("[ALIO] โŒ ํŒŒ๋ผ๋ฏธํ„ฐ ์˜ค๋ฅ˜")
35
+ else:
36
+ try:
37
+ data = res.json()
38
+ # ๊ฒฐ๊ณผ ์ฝ”๋“œ ํ™•์ธ
39
+ res_code = data.get("response", {}).get("header", {}).get("resultCode")
40
+ if res_code == "00":
41
+ print("[ALIO] โœ… ์ธ์ฆ ์„ฑ๊ณต! ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ๊ฐ€๋Šฅ")
42
+ else:
43
+ print(f"[ALIO] โŒ ๊ฒฐ๊ณผ ์˜ค๋ฅ˜ (์ฝ”๋“œ: {res_code})")
44
+ except:
45
+ print(f"[ALIO] โš ๏ธ ๋น„์ •์ƒ ์‘๋‹ต (๋‚ด์šฉ ์ผ๋ถ€): {res.text[:200]}")
46
+ else:
47
+ print(f"[ALIO] โŒ HTTP ์˜ค๋ฅ˜: {res.status_code}")
48
+ except Exception as e:
49
+ print(f"[ALIO] โŒ ์—ฐ๊ฒฐ ์—๋Ÿฌ: {e}")
50
+
51
+
52
+ if __name__ == "__main__":
53
+ print("-" * 50)
54
+ print("๐Ÿš€ API ์ตœ์ข… ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ ์‹œ์ž‘")
55
+ test_law()
56
+ test_alio()
57
+ print("-" * 50)
scripts/govon-bootstrap.sh ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # GovOn daemon bootstrap script
3
+ # Usage: ./scripts/govon-bootstrap.sh [start|stop|status|health]
4
+ #
5
+ # ํ™˜๊ฒฝ๋ณ€์ˆ˜:
6
+ # GOVON_HOME โ€” GovOn ํ™ˆ ๋””๋ ‰ํ„ฐ๋ฆฌ (๊ธฐ๋ณธ: ~/.govon)
7
+ # GOVON_PORT โ€” daemon ํฌํŠธ (๊ธฐ๋ณธ: 8000)
8
+ # SKIP_MODEL_LOAD โ€” ๋ชจ๋ธ ๋กœ๋“œ ๊ฑด๋„ˆ๋›ฐ๊ธฐ (๊ฒฝ๊ณ  ํ‘œ์‹œ๋จ)
9
+
10
+ set -euo pipefail
11
+
12
+ PYTHON_CMD=""
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # ์„ค์ •
16
+ # ---------------------------------------------------------------------------
17
+ GOVON_HOME="${GOVON_HOME:-$HOME/.govon}"
18
+ GOVON_PORT="${GOVON_PORT:-8000}"
19
+ HEALTH_URL="http://127.0.0.1:${GOVON_PORT}/health"
20
+ PID_FILE="${GOVON_HOME}/daemon.pid"
21
+ LOG_FILE="${GOVON_HOME}/daemon.log"
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # ์ƒ‰์ƒ ์ถœ๋ ฅ ํ—ฌํผ
25
+ # ---------------------------------------------------------------------------
26
+ _info() { echo "[INFO] $*"; }
27
+ _warn() { echo "[WARN] $*" >&2; }
28
+ _error() { echo "[ERROR] $*" >&2; }
29
+ _success() { echo "[OK] $*"; }
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Pre-flight ๊ฒ€์‚ฌ
33
+ # ---------------------------------------------------------------------------
34
+ _preflight_checks() {
35
+ # SKIP_MODEL_LOAD ๊ฒฝ๊ณ 
36
+ if [ "${SKIP_MODEL_LOAD:-}" = "true" ] || [ "${SKIP_MODEL_LOAD:-}" = "1" ]; then
37
+ _warn "SKIP_MODEL_LOAD๊ฐ€ ์„ค์ •๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์œผ๋ฉฐ ์ผ๋ถ€ ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋ฉ๋‹ˆ๋‹ค."
38
+ fi
39
+
40
+ # GPU ๊ฐ์ง€ ๊ฒฝ๊ณ 
41
+ if command -v nvidia-smi &>/dev/null; then
42
+ if ! nvidia-smi &>/dev/null 2>&1; then
43
+ _warn "nvidia-smi ์‹คํ–‰์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค. GPU๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
44
+ fi
45
+ else
46
+ _warn "nvidia-smi๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. CPU ์ „์šฉ ๋ชจ๋“œ๋กœ ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค. (์„ฑ๋Šฅ์ด ํฌ๊ฒŒ ์ €ํ•˜๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)"
47
+ fi
48
+ }
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Python / govon ์„ค์น˜ ํ™•์ธ
52
+ # ---------------------------------------------------------------------------
53
+ _check_python() {
54
+ if ! command -v python3 &>/dev/null && ! command -v python &>/dev/null; then
55
+ _error "Python์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. Python 3.10 ์ด์ƒ์„ ์„ค์น˜ํ•˜์„ธ์š”."
56
+ exit 1
57
+ fi
58
+ PYTHON_CMD="$(command -v python3 || command -v python)"
59
+ _info "Python: $("$PYTHON_CMD" --version 2>&1)"
60
+ }
61
+
62
+ _check_govon() {
63
+ # govon CLI ๋˜๋Š” src.cli.shell ๋ชจ๋“ˆ ๊ฐ€์šฉ ์—ฌ๋ถ€ ํ™•์ธ
64
+ if command -v govon &>/dev/null; then
65
+ GOVON_CMD="govon"
66
+ _info "govon ๋ช…๋ น์–ด ๋ฐœ๊ฒฌ: $(command -v govon)"
67
+ elif $PYTHON_CMD -c "import src.cli.shell" 2>/dev/null; then
68
+ GOVON_CMD="$PYTHON_CMD -m src.cli.shell"
69
+ _info "govon ๋ชจ๋“ˆ(src.cli.shell) ๋ฐœ๊ฒฌ"
70
+ else
71
+ _error "govon์ด ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. 'pip install govon[cli]' ๋˜๋Š” 'pip install -e .[cli]'๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”."
72
+ exit 1
73
+ fi
74
+ }
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # PID ์œ ํ‹ธ๋ฆฌํ‹ฐ
78
+ # ---------------------------------------------------------------------------
79
+ _read_pid() {
80
+ if [ -f "$PID_FILE" ]; then
81
+ awk '{print $1}' "$PID_FILE" 2>/dev/null || echo ""
82
+ fi
83
+ }
84
+
85
+ _pid_alive() {
86
+ local pid="$1"
87
+ [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null
88
+ }
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # health ํ™•์ธ
92
+ # ---------------------------------------------------------------------------
93
+ _health_check() {
94
+ curl -sf --max-time 5 "$HEALTH_URL" &>/dev/null
95
+ }
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # ๋ช…๋ น: start
99
+ # ---------------------------------------------------------------------------
100
+ cmd_start() {
101
+ _preflight_checks
102
+ _check_python
103
+ _check_govon
104
+
105
+ # ์ด๋ฏธ ์‹คํ–‰ ์ค‘์ธ์ง€ ํ™•์ธ
106
+ local existing_pid
107
+ existing_pid="$(_read_pid)"
108
+ if _pid_alive "$existing_pid" && _health_check; then
109
+ _success "GovOn daemon์ด ์ด๋ฏธ ์‹คํ–‰ ์ค‘์ž…๋‹ˆ๋‹ค. (PID=$existing_pid, ํฌํŠธ=$GOVON_PORT)"
110
+ exit 0
111
+ fi
112
+
113
+ # ~/.govon ๋””๋ ‰ํ„ฐ๋ฆฌ ์ƒ์„ฑ
114
+ mkdir -p "$GOVON_HOME"
115
+
116
+ _info "GovOn daemon์„ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. (ํฌํŠธ=$GOVON_PORT, ๋กœ๊ทธ=$LOG_FILE)"
117
+
118
+ # daemon ๊ธฐ๋™
119
+ if [ "$GOVON_CMD" = "govon" ]; then
120
+ # govon CLI๋ฅผ ํ†ตํ•œ ๊ธฐ๋™ (govon --start ์ง€์› ์‹œ ์‚ฌ์šฉ; ์—†์œผ๋ฉด ์ง์ ‘ uvicorn ํ˜ธ์ถœ)
121
+ if govon --help 2>&1 | grep -q -- "--start" 2>/dev/null; then
122
+ govon --start >> "$LOG_FILE" 2>&1 &
123
+ else
124
+ # ์ง์ ‘ uvicorn์œผ๋กœ ๊ธฐ๋™
125
+ $PYTHON_CMD -m uvicorn src.inference.api_server:app \
126
+ --host 127.0.0.1 \
127
+ --port "$GOVON_PORT" >> "$LOG_FILE" 2>&1 &
128
+ fi
129
+ else
130
+ $PYTHON_CMD -m uvicorn src.inference.api_server:app \
131
+ --host 127.0.0.1 \
132
+ --port "$GOVON_PORT" >> "$LOG_FILE" 2>&1 &
133
+ fi
134
+
135
+ local daemon_pid=$!
136
+ echo "$daemon_pid $(date +%s)" > "$PID_FILE"
137
+ _info "daemon PID=$daemon_pid ๊ธฐ๋ก ์™„๋ฃŒ."
138
+
139
+ # ๋น ๋ฅธ ์‹คํŒจ ๊ฐ์ง€: 2์ดˆ ํ›„ ํ”„๋กœ์„ธ์Šค๊ฐ€ ์ด๋ฏธ ์ข…๋ฃŒ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
140
+ sleep 2
141
+ if ! kill -0 "$daemon_pid" 2>/dev/null; then
142
+ _error "daemon์ด ๊ธฐ๋™ ์งํ›„ ์ข…๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•˜์„ธ์š”: $LOG_FILE"
143
+ rm -f "$PID_FILE"
144
+ exit 1
145
+ fi
146
+
147
+ # health check ๋Œ€๊ธฐ (์ตœ๋Œ€ 120์ดˆ)
148
+ local elapsed=0
149
+ local max_wait=120
150
+ _info "health check ๋Œ€๊ธฐ ์ค‘..."
151
+ while [ $elapsed -lt $max_wait ]; do
152
+ if _health_check; then
153
+ _success "GovOn daemon ๊ธฐ๋™ ์™„๋ฃŒ. (PID=$daemon_pid, ํฌํŠธ=$GOVON_PORT)"
154
+ exit 0
155
+ fi
156
+ sleep 1
157
+ elapsed=$((elapsed + 1))
158
+ done
159
+
160
+ _error "health check timeout (${max_wait}s). ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•˜์„ธ์š”: $LOG_FILE"
161
+ exit 1
162
+ }
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # ๋ช…๋ น: stop
166
+ # ---------------------------------------------------------------------------
167
+ cmd_stop() {
168
+ local pid
169
+ pid="$(_read_pid)"
170
+
171
+ if [ -z "$pid" ]; then
172
+ _warn "PID ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค. daemon์ด ์‹คํ–‰ ์ค‘์ด ์•„๋‹Œ ๊ฒƒ์œผ๋กœ ๊ฐ„์ฃผํ•ฉ๋‹ˆ๋‹ค."
173
+ exit 0
174
+ fi
175
+
176
+ if ! _pid_alive "$pid"; then
177
+ _warn "PID=$pid ํ”„๋กœ์„ธ์Šค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. PID ํŒŒ์ผ์„ ์ œ๊ฑฐํ•ฉ๋‹ˆ๋‹ค."
178
+ rm -f "$PID_FILE"
179
+ exit 0
180
+ fi
181
+
182
+ # govon CLI --stop ์ง€์› ์—ฌ๋ถ€ ํ™•์ธ
183
+ if command -v govon &>/dev/null && govon --help 2>&1 | grep -q -- "--stop" 2>/dev/null; then
184
+ govon --stop
185
+ else
186
+ _info "SIGTERM ์ „์†ก: PID=$pid"
187
+ kill -TERM "$pid"
188
+
189
+ local elapsed=0
190
+ while [ $elapsed -lt 10 ]; do
191
+ if ! _pid_alive "$pid"; then
192
+ _success "GovOn daemon์ด ์ •์ƒ ์ข…๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. (PID=$pid)"
193
+ rm -f "$PID_FILE"
194
+ exit 0
195
+ fi
196
+ sleep 1
197
+ elapsed=$((elapsed + 1))
198
+ done
199
+
200
+ _warn "timeout โ€” SIGKILL ์ „์†ก: PID=$pid"
201
+ kill -KILL "$pid" 2>/dev/null || true
202
+ rm -f "$PID_FILE"
203
+ _success "GovOn daemon์ด ๊ฐ•์ œ ์ข…๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. (PID=$pid)"
204
+ fi
205
+ }
206
+
207
+ # ---------------------------------------------------------------------------
208
+ # ๋ช…๋ น: status
209
+ # ---------------------------------------------------------------------------
210
+ cmd_status() {
211
+ local pid
212
+ pid="$(_read_pid)"
213
+
214
+ if [ -z "$pid" ]; then
215
+ echo "GovOn daemon: ์ค‘์ง€๋จ (PID ํŒŒ์ผ ์—†์Œ)"
216
+ exit 1
217
+ fi
218
+
219
+ if ! _pid_alive "$pid"; then
220
+ echo "GovOn daemon: ์ค‘์ง€๋จ (PID=$pid โ€” ํ”„๋กœ์„ธ์Šค ์—†์Œ)"
221
+ rm -f "$PID_FILE"
222
+ exit 1
223
+ fi
224
+
225
+ if _health_check; then
226
+ echo "GovOn daemon: ์‹คํ–‰ ์ค‘ (PID=$pid, ํฌํŠธ=$GOVON_PORT)"
227
+ exit 0
228
+ else
229
+ echo "GovOn daemon: ํ”„๋กœ์„ธ์Šค๋Š” ์‚ด์•„ ์žˆ์ง€๋งŒ health check ์‹คํŒจ (PID=$pid, URL=$HEALTH_URL)"
230
+ exit 2
231
+ fi
232
+ }
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # ๋ช…๋ น: health
236
+ # ---------------------------------------------------------------------------
237
+ cmd_health() {
238
+ _info "GET $HEALTH_URL"
239
+ if curl -sf --max-time 10 "$HEALTH_URL"; then
240
+ echo ""
241
+ _success "health check ํ†ต๊ณผ."
242
+ exit 0
243
+ else
244
+ _error "health check ์‹คํŒจ. daemon์ด ์‹คํ–‰ ์ค‘์ธ์ง€ ํ™•์ธํ•˜์„ธ์š”."
245
+ exit 1
246
+ fi
247
+ }
248
+
249
+ # ---------------------------------------------------------------------------
250
+ # ์ง„์ž…์ 
251
+ # ---------------------------------------------------------------------------
252
+ COMMAND="${1:-help}"
253
+
254
+ case "$COMMAND" in
255
+ start)
256
+ cmd_start
257
+ ;;
258
+ stop)
259
+ cmd_stop
260
+ ;;
261
+ status)
262
+ cmd_status
263
+ ;;
264
+ health)
265
+ cmd_health
266
+ ;;
267
+ help|--help|-h)
268
+ echo "์‚ฌ์šฉ๋ฒ•: $0 [start|stop|status|health]"
269
+ echo ""
270
+ echo "๋ช…๋ น์–ด:"
271
+ echo " start โ€” GovOn daemon์„ ๊ธฐ๋™ํ•ฉ๋‹ˆ๋‹ค"
272
+ echo " stop โ€” GovOn daemon์„ ์ค‘์ง€ํ•ฉ๋‹ˆ๋‹ค"
273
+ echo " status โ€” daemon ์‹คํ–‰ ์ƒํƒœ๋ฅผ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค"
274
+ echo " health โ€” /health ์—”๋“œํฌ์ธํŠธ๋ฅผ probeํ•ฉ๋‹ˆ๋‹ค"
275
+ echo ""
276
+ echo "ํ™˜๊ฒฝ๋ณ€์ˆ˜:"
277
+ echo " GOVON_HOME=$GOVON_HOME"
278
+ echo " GOVON_PORT=$GOVON_PORT"
279
+ echo " SKIP_MODEL_LOAD (์„ค์ • ์‹œ ๊ฒฝ๊ณ  ํ‘œ์‹œ)"
280
+ exit 0
281
+ ;;
282
+ *)
283
+ _error "์•Œ ์ˆ˜ ์—†๋Š” ๋ช…๋ น: $COMMAND"
284
+ echo "์‚ฌ์šฉ๋ฒ•: $0 [start|stop|status|health]"
285
+ exit 1
286
+ ;;
287
+ esac
scripts/offline-deploy.sh ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
6
+ IMAGE_FILE="${PROJECT_DIR}/govon-image.tar.gz"
7
+ ENV_TEMPLATE="${PROJECT_DIR}/.env.airgap.example"
8
+ ENV_FILE="${PROJECT_DIR}/.env"
9
+ API_KEY_PLACEHOLDER="CHANGE_ME_TO_SECURE_RANDOM_KEY"
10
+ BM25_INDEX_HMAC_KEY_PLACEHOLDER="CHANGE_ME_TO_SECURE_HMAC_KEY"
11
+
12
+ extract_env_value() {
13
+ local key="$1"
14
+ local file="$2"
15
+
16
+ awk -F= -v key="$key" '
17
+ $0 ~ "^[[:space:]]*" key "=" {
18
+ sub(/^[^=]*=/, "", $0)
19
+ print $0
20
+ exit
21
+ }
22
+ ' "$file"
23
+ }
24
+
25
+ require_secure_env_value() {
26
+ local key="$1"
27
+ local placeholder="$2"
28
+ local value
29
+
30
+ value="$(extract_env_value "$key" "$ENV_FILE")"
31
+ if [ -z "$value" ] || [ "$value" = "$placeholder" ]; then
32
+ echo "[ERROR] ${key} ๊ฐ’์ด ๋น„์–ด ์žˆ๊ฑฐ๋‚˜ ์˜ˆ์‹œ placeholder ๊ทธ๋Œ€๋กœ์ž…๋‹ˆ๋‹ค."
33
+ echo " ${ENV_FILE}์—์„œ ${key}๋ฅผ ์•ˆ์ „ํ•œ ์ž„์˜ ๋ฌธ์ž์—ด๋กœ ์ˆ˜์ •ํ•œ ๋’ค ๋‹ค์‹œ ์‹คํ–‰ํ•˜์„ธ์š”."
34
+ exit 1
35
+ fi
36
+ }
37
+
38
+ echo "=== GovOn ์˜คํ”„๋ผ์ธ ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ ==="
39
+
40
+ # 1. Docker ์„ค์น˜ ํ™•์ธ
41
+ if ! command -v docker &>/dev/null; then
42
+ echo "[ERROR] Docker๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."
43
+ echo "์„ค์น˜ ๊ฐ€์ด๋“œ: https://docs.docker.com/engine/install/"
44
+ exit 1
45
+ fi
46
+ echo "[OK] Docker: $(docker --version)"
47
+
48
+ # 2. Docker Compose ํ™•์ธ
49
+ if ! docker compose version &>/dev/null; then
50
+ echo "[ERROR] Docker Compose๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."
51
+ exit 1
52
+ fi
53
+ echo "[OK] Docker Compose: $(docker compose version --short)"
54
+
55
+ # 3. NVIDIA Container Toolkit ํ™•์ธ (๊ฒฝ๊ณ ๋งŒ)
56
+ if docker info 2>/dev/null | grep -q "Runtimes.*nvidia"; then
57
+ echo "[OK] NVIDIA Container Toolkit ๊ฐ์ง€๋จ"
58
+ else
59
+ echo "[WARNING] NVIDIA Container Toolkit์ด ๊ฐ์ง€๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
60
+ echo "GPU ๊ฐ€์†์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
61
+ fi
62
+
63
+ # 4. ์ด๋ฏธ์ง€ ํŒŒ์ผ ํ™•์ธ ๋ฐ ๋กœ๋“œ
64
+ if [ ! -f "$IMAGE_FILE" ]; then
65
+ echo "[ERROR] ์ด๋ฏธ์ง€ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: $IMAGE_FILE"
66
+ exit 1
67
+ fi
68
+ echo "Docker ์ด๋ฏธ์ง€ ๋กœ๋“œ ์ค‘... (์‹œ๊ฐ„์ด ์†Œ์š”๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)"
69
+ gunzip -c "$IMAGE_FILE" | docker load
70
+ echo "[OK] ์ด๋ฏธ์ง€ ๋กœ๋“œ ์™„๋ฃŒ"
71
+
72
+ # 5. ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํ…œํ”Œ๋ฆฟ ์ค€๋น„
73
+ if [ ! -f "$ENV_FILE" ] && [ -f "$ENV_TEMPLATE" ]; then
74
+ cp "$ENV_TEMPLATE" "$ENV_FILE"
75
+ echo "[OK] .env ํŒŒ์ผ์„ .env.airgap.example ๊ธฐ์ค€์œผ๋กœ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค."
76
+ echo " API_KEY, BM25_INDEX_HMAC_KEY, CORS_ORIGINS ๋“ฑ์„ ์ˆ˜์ •ํ•œ ๋’ค ์žฌ์‹คํ–‰ํ•˜์„ธ์š”."
77
+ fi
78
+
79
+ if [ -z "${MODEL_PATH:-}" ] && [ ! -f "$ENV_FILE" ]; then
80
+ echo "[INFO] MODEL_PATH๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
81
+ echo " ์˜คํ”„๋ผ์ธ ํ™˜๊ฒฝ์—์„œ๋Š” ์ปจํ…Œ์ด๋„ˆ ๋‚ด๋ถ€ ๊ฒฝ๋กœ๋ฅผ ์ง€์ •ํ•˜์„ธ์š”:"
82
+ echo " export MODEL_PATH=/app/models/EXAONE-4.0-32B-AWQ"
83
+ fi
84
+
85
+ if [ ! -f "$ENV_FILE" ]; then
86
+ echo "[ERROR] ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: $ENV_FILE"
87
+ exit 1
88
+ fi
89
+
90
+ require_secure_env_value "API_KEY" "$API_KEY_PLACEHOLDER"
91
+ require_secure_env_value "BM25_INDEX_HMAC_KEY" "$BM25_INDEX_HMAC_KEY_PLACEHOLDER"
92
+
93
+ # 6. ๋ณผ๋ฅจ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
94
+ echo "๋ณผ๋ฅจ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ์ค‘..."
95
+ mkdir -p \
96
+ "${PROJECT_DIR}/models" \
97
+ "${PROJECT_DIR}/data" \
98
+ "${PROJECT_DIR}/agents" \
99
+ "${PROJECT_DIR}/configs" \
100
+ "${PROJECT_DIR}/logs" \
101
+ "${PROJECT_DIR}/.cache"
102
+ echo "[OK] ๋ณผ๋ฅจ ๋””๋ ‰ํ† ๋ฆฌ ์ค€๋น„ ์™„๋ฃŒ"
103
+
104
+ # 7. ์ปจํ…Œ์ด๋„ˆ ์‹คํ–‰
105
+ echo "์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘ ์ค‘..."
106
+ docker compose --env-file "${ENV_FILE}" -f "${PROJECT_DIR}/docker-compose.offline.yml" up -d
107
+ echo "[OK] ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘๋จ"
108
+
109
+ # 8. ํ—ฌ์Šค์ฒดํฌ ๋Œ€๊ธฐ
110
+ echo "์„œ๋ฒ„ ์‹œ์ž‘ ๋Œ€๊ธฐ ์ค‘... (์ตœ๋Œ€ 120์ดˆ)"
111
+ for i in $(seq 1 24); do
112
+ if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
113
+ echo ""
114
+ echo "=============================="
115
+ echo "[SUCCESS] GovOn ์„œ๋ฒ„๊ฐ€ ์ •์ƒ ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค!"
116
+ echo "API ์ฃผ์†Œ: http://localhost:8000"
117
+ echo "ํ—ฌ์Šค์ฒดํฌ: http://localhost:8000/health"
118
+ echo "=============================="
119
+ exit 0
120
+ fi
121
+ printf "."
122
+ sleep 5
123
+ done
124
+
125
+ echo ""
126
+ echo "[ERROR] ์„œ๋ฒ„ ์‹œ์ž‘ ์‹คํŒจ (120์ดˆ ํƒ€์ž„์•„์›ƒ)"
127
+ echo "๋กœ๊ทธ ํ™•์ธ: docker compose --env-file ${ENV_FILE} -f ${PROJECT_DIR}/docker-compose.offline.yml logs"
128
+ exit 1
scripts/smoke-test.sh ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ BASE_URL="${1:-http://localhost:8000}"
5
+
6
+ echo "=== GovOn Smoke Test ==="
7
+ echo "๋Œ€์ƒ: $BASE_URL"
8
+ echo ""
9
+
10
+ PASS=0
11
+ FAIL=0
12
+
13
+ # Test 1: Health check
14
+ echo -n "[TEST] GET /health ... "
15
+ HEALTH_RESPONSE=$(curl -sf "${BASE_URL}/health" 2>/dev/null) || { echo "FAIL (์—ฐ๊ฒฐ ์‹คํŒจ)"; FAIL=$((FAIL+1)); }
16
+ if [ -n "${HEALTH_RESPONSE:-}" ]; then
17
+ STATUS=$(echo "$HEALTH_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
18
+ if [ "$STATUS" = "healthy" ]; then
19
+ echo "PASS"
20
+ PASS=$((PASS+1))
21
+ else
22
+ echo "FAIL (status: ${STATUS:-unknown})"
23
+ FAIL=$((FAIL+1))
24
+ fi
25
+ fi
26
+
27
+ # Test 2: Health response structure
28
+ echo -n "[TEST] /health ์‘๋‹ต ๊ตฌ์กฐ ... "
29
+ if echo "$HEALTH_RESPONSE" | python3 -c "import sys,json; d=json.load(sys.stdin); assert 'status' in d" 2>/dev/null; then
30
+ echo "PASS"
31
+ PASS=$((PASS+1))
32
+ else
33
+ echo "FAIL"
34
+ FAIL=$((FAIL+1))
35
+ fi
36
+
37
+ echo ""
38
+ echo "=============================="
39
+ echo "๊ฒฐ๊ณผ: PASS=${PASS}, FAIL=${FAIL}"
40
+ if [ "$FAIL" -gt 0 ]; then
41
+ echo "์ƒํƒœ: FAILED"
42
+ exit 1
43
+ else
44
+ echo "์ƒํƒœ: PASSED"
45
+ exit 0
46
+ fi
scripts/test_alio_only.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import requests
4
+
5
+
6
+ def test_alio_api():
7
+ key = os.getenv("DATA_GO_KR_API_KEY")
8
+ # Decoding ํ‚ค๋ฅผ ์ง์ ‘ ์‚ฌ์šฉํ•˜์—ฌ requests๊ฐ€ ์ธ์ฝ”๋”ฉํ•˜๋„๋ก ์œ„์ž„
9
+ url = "https://apis.data.go.kr/1051000/public_inst/list"
10
+ params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
11
+ try:
12
+ res = requests.get(url, params=params, timeout=10)
13
+ print(f"HTTP Status: {res.status_code}")
14
+ if res.status_code == 200:
15
+ if "<ServiceKey Error" in res.text:
16
+ print("โŒ ์ธ์ฆํ‚ค ์˜ค๋ฅ˜ (ServiceKey Error)")
17
+ return False
18
+
19
+ try:
20
+ data = res.json()
21
+ header = data.get("response", {}).get("header", {})
22
+ code = header.get("resultCode")
23
+ msg = header.get("resultMsg")
24
+ if code == "00":
25
+ print(f"โœ… ALIO API ์œ ํšจํ•จ! (๊ฒฐ๊ณผ์ฝ”๋“œ: {code})")
26
+ return True
27
+ else:
28
+ print(f"โŒ ์ธ์ฆ ์˜ค๋ฅ˜ ๋ฐœ์ƒ (์ฝ”๋“œ: {code}, ๋ฉ”์‹œ์ง€: {msg})")
29
+ return False
30
+ except Exception as e:
31
+ print(f"โš ๏ธ JSON ํŒŒ์‹ฑ ์‹คํŒจ ๋˜๋Š” ๋น„์ •์ƒ ์‘๋‹ต: {res.text[:200]}")
32
+ return False
33
+ else:
34
+ print(f"โŒ HTTP ์š”์ฒญ ์‹คํŒจ (Status: {res.status_code})")
35
+ return False
36
+ except Exception as e:
37
+ print(f"โŒ ์—ฐ๊ฒฐ ์‹คํŒจ: {e}")
38
+ return False
39
+
40
+
41
+ if __name__ == "__main__":
42
+ test_alio_api()
scripts/test_api_keys.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import requests
5
+
6
+
7
+ def test_law_api():
8
+ oc = os.getenv("LAW_GO_KR_OC")
9
+ url = "http://www.law.go.kr/DRF/lawSearch.do"
10
+ params = {"target": "law", "query": "๋ฏผ์›", "type": "XML", "OC": oc}
11
+ try:
12
+ res = requests.get(url, params=params, timeout=10)
13
+ if res.status_code == 200 and "<law" in res.text:
14
+ print(f"[LAW API] โœ… ์œ ํšจํ•จ (์ƒํƒœ์ฝ”๋“œ: 200)")
15
+ return True
16
+ else:
17
+ print(f"[LAW API] โŒ ์˜ค๋ฅ˜ (์ƒํƒœ์ฝ”๋“œ: {res.status_code})")
18
+ print(f"์‘๋‹ต๋‚ด์šฉ ์ผ๋ถ€: {res.text[:200]}")
19
+ return False
20
+ except Exception as e:
21
+ print(f"[LAW API] โŒ ์—ฐ๊ฒฐ ์‹คํŒจ: {e}")
22
+ return False
23
+
24
+
25
+ def test_alio_api():
26
+ key = os.getenv("DATA_GO_KR_API_KEY")
27
+ # Decoding ํ‚ค๋ฅผ ์‚ฌ์šฉํ•˜๊ธฐ ๋•Œ๋ฌธ์— requests๊ฐ€ ํ•œ ๋ฒˆ ๋” ์ธ์ฝ”๋”ฉํ•˜๋„๋ก ํ•จ
28
+ url = "https://apis.data.go.kr/1051000/public_inst/list"
29
+ params = {"serviceKey": key, "pageNo": 1, "numOfRows": 1, "resultType": "json"}
30
+ try:
31
+ res = requests.get(url, params=params, timeout=10)
32
+ if res.status_code == 200:
33
+ try:
34
+ data = res.json()
35
+ code = data.get("response", {}).get("header", {}).get("resultCode")
36
+ if code == "00":
37
+ print(f"[ALIO API] โœ… ์œ ํšจํ•จ (์ƒํƒœ์ฝ”๋“œ: 200, ๊ฒฐ๊ณผ์ฝ”๋“œ: 00)")
38
+ return True
39
+ else:
40
+ msg = (
41
+ data.get("response", {})
42
+ .get("header", {})
43
+ .get("resultMsg", "์•Œ ์ˆ˜ ์—†๋Š” ์˜ค๋ฅ˜")
44
+ )
45
+ print(f"[ALIO API] โŒ ์ธ์ฆ ์˜ค๋ฅ˜ (๊ฒฐ๊ณผ์ฝ”๋“œ: {code}, ๋ฉ”์‹œ์ง€: {msg})")
46
+ return False
47
+ except Exception:
48
+ if "<ServiceKey Error" in res.text:
49
+ print("[ALIO API] โŒ ์ธ์ฆํ‚ค ์˜ค๋ฅ˜ (ServiceKey Error)")
50
+ else:
51
+ print(f"[ALIO API] โŒ ๋น„์ •์ƒ ์‘๋‹ต: {res.text[:200]}")
52
+ return False
53
+ else:
54
+ print(f"[ALIO API] โŒ HTTP ์˜ค๋ฅ˜ (์ƒํƒœ์ฝ”๋“œ: {res.status_code})")
55
+ return False
56
+ except Exception as e:
57
+ print(f"[ALIO API] โŒ ์—ฐ๊ฒฐ ์‹คํŒจ: {e}")
58
+ return False
59
+
60
+
61
+ if __name__ == "__main__":
62
+ print("-" * 50)
63
+ print("๐Ÿš€ API ํ‚ค ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ ์‹œ์ž‘")
64
+ law_ok = test_law_api()
65
+ alio_ok = test_alio_api()
66
+ print("-" * 50)
67
+ if law_ok and alio_ok:
68
+ print("โœจ ๋ชจ๋“  API ํ‚ค๊ฐ€ ์ •์ƒ์ ์œผ๋กœ ์ž‘๋™ํ•ฉ๋‹ˆ๋‹ค!")
69
+ else:
70
+ print("โš ๏ธ ์ผ๋ถ€ API ํ‚ค์— ํ™•์ธ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
71
+ sys.exit(1)
scripts/test_law_https.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import requests
4
+
5
+
6
+ def test_law_https():
7
+ oc = os.getenv("LAW_GO_KR_OC")
8
+ # HTTPS ์ฃผ์†Œ๋กœ ์‹œ๋„
9
+ url = f"https://www.law.go.kr/DRF/lawSearch.do?target=law&query=๋ฏผ์›&type=XML&OC={oc}"
10
+ try:
11
+ res = requests.get(url, timeout=15)
12
+ print(f"URL: {url}")
13
+ print(f"Status: {res.status_code}")
14
+ if "์‚ฌ์šฉ์ž ์ •๋ณด ๊ฒ€์ฆ์— ์‹คํŒจ" in res.text:
15
+ print("โŒ HTTPS๋กœ๋„ IP ์ธ์ฆ ์‹คํŒจ")
16
+ elif "<law" in res.text:
17
+ print("โœ… HTTPS ํ˜ธ์ถœ ์„ฑ๊ณต!")
18
+ else:
19
+ print(f"โš ๏ธ ์‘๋‹ต ํ™•์ธ ํ•„์š”: {res.text[:200]}")
20
+ except Exception as e:
21
+ print(f"โŒ ์—๋Ÿฌ: {e}")
22
+
23
+
24
+ if __name__ == "__main__":
25
+ test_law_https()
scripts/verify_e2e_tool_calling.py ADDED
@@ -0,0 +1,1645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """GovOn Native Tool Calling + AdapterRegistry E2E ๊ฒ€์ฆ ์Šคํฌ๋ฆฝํŠธ.
3
+
4
+ HuggingFace Space์— ๋ฐฐํฌ๋œ govon-runtime ์„œ๋ฒ„์— ๋Œ€ํ•ด
5
+ ์—์ด์ „ํŠธ ํŒŒ์ดํ”„๋ผ์ธ(ํ”Œ๋ž˜๋„ˆ โ†’ ๋„๊ตฌ ์‹คํ–‰ โ†’ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜)์„ ๊ฒ€์ฆํ•œ๋‹ค.
6
+
7
+ ์‚ฌ์šฉ๋ฒ•:
8
+ GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_e2e_tool_calling.py
9
+ GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_e2e_tool_calling.py
10
+
11
+ 5-Phase ๊ฒ€์ฆ (13 ์‹œ๋‚˜๋ฆฌ์˜ค):
12
+ Phase 1: Infrastructure (hard gate)
13
+ 1. Health & Profile
14
+ 2. Base Model Generation
15
+ 3. Adapter Registry
16
+ Phase 2: Agent Pipeline Core
17
+ 4. Planner Produces Valid Plan
18
+ 5. Civil LoRA Draft Response
19
+ 6. Legal LoRA Evidence Augmentation (depends on 5)
20
+ 7. Task Type Classification
21
+ Phase 3: data.go.kr API Tools (soft gate)
22
+ 8. External API Tool Invocation (4 sub-cases)
23
+ Phase 4: Adapter Dynamics
24
+ 9. Sequential Adapter Switching
25
+ 10. LoRA ID Consistency
26
+ Phase 5: Robustness
27
+ 11. Empty Query Handling
28
+ 12. Reject Flow Completeness
29
+ 13. Concurrent Request Isolation
30
+ """
31
+
32
+ # stdlib
33
+ import asyncio
34
+ import json
35
+ import logging
36
+ import os
37
+ import re
38
+ import sys
39
+ import time
40
+ from typing import Any, Optional
41
+ from uuid import uuid4
42
+
43
+ BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/")
44
+ API_KEY = os.environ.get("API_KEY")
45
+ TIMEOUT = 300 # ์‹œ๋‚˜๋ฆฌ์˜ค๋‹น ์ตœ๋Œ€ ๋Œ€๊ธฐ ์‹œ๊ฐ„ (์ดˆ)
46
+ BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ"
47
+ RESULTS_PATH = "verify_e2e_tool_calling_results.json"
48
+
49
+ VALID_TOOLS = frozenset(
50
+ {
51
+ "rag_search",
52
+ "api_lookup",
53
+ "draft_civil_response",
54
+ "append_evidence",
55
+ "issue_detector",
56
+ "stats_lookup",
57
+ "keyword_analyzer",
58
+ "demographics_lookup",
59
+ }
60
+ )
61
+
62
+ LEGAL_PATTERNS = [
63
+ r"์ œ\s*\d+\s*์กฐ",
64
+ r"์ œ\s*\d+\s*ํ•ญ",
65
+ r"๋ฒ•๋ฅ ",
66
+ r"์‹œํ–‰๋ น",
67
+ r"์กฐ๋ก€",
68
+ r"ํŒ๋ก€",
69
+ r"๋Œ€๋ฒ•์›",
70
+ r"๋ฒ•",
71
+ r"๋ น",
72
+ r"๊ทœ์ •",
73
+ ]
74
+
75
+ logger = logging.getLogger(__name__)
76
+
77
+ _results: list[dict] = []
78
+ _observed_tools: set[str] = set()
79
+ _run_id = uuid4().hex
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # HTTP ํด๋ผ์ด์–ธํŠธ ๋ ˆ์ด์–ด (httpx ์šฐ์„ , urllib fallback)
84
+ # ---------------------------------------------------------------------------
85
+
86
+ try:
87
+ import httpx
88
+
89
+ _HTTP_BACKEND = "httpx"
90
+
91
+ def _build_headers() -> dict:
92
+ h = {"Content-Type": "application/json", "Accept": "application/json"}
93
+ if API_KEY:
94
+ h["X-API-Key"] = API_KEY
95
+ return h
96
+
97
+ async def http_get(path: str, timeout: float = TIMEOUT) -> tuple[int, dict]:
98
+ url = BASE_URL + path
99
+ async with httpx.AsyncClient(timeout=timeout) as client:
100
+ resp = await client.get(url, headers=_build_headers())
101
+ try:
102
+ return resp.status_code, resp.json()
103
+ except Exception:
104
+ return resp.status_code, {"_raw": resp.text[:200]}
105
+
106
+ async def http_post(path: str, body: dict, timeout: float = TIMEOUT) -> tuple[int, dict]:
107
+ url = BASE_URL + path
108
+ async with httpx.AsyncClient(timeout=timeout) as client:
109
+ resp = await client.post(url, json=body, headers=_build_headers())
110
+ try:
111
+ return resp.status_code, resp.json()
112
+ except Exception:
113
+ return resp.status_code, {"_raw": resp.text[:200]}
114
+
115
+ async def http_post_sse(
116
+ path: str, body: dict, timeout: float = TIMEOUT
117
+ ) -> tuple[int, list[dict]]:
118
+ """SSE ์ŠคํŠธ๋ฆฌ๋ฐ POST. ์ฒญํฌ๋ฅผ ์ˆ˜์ง‘ํ•˜์—ฌ ํŒŒ์‹ฑ๋œ ์ด๋ฒคํŠธ ๋ชฉ๋ก์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค."""
119
+ url = BASE_URL + path
120
+ h = _build_headers()
121
+ h["Accept"] = "text/event-stream"
122
+ events: list[dict] = []
123
+ status_code = 0
124
+ async with httpx.AsyncClient(timeout=timeout) as client:
125
+ async with client.stream("POST", url, json=body, headers=h) as resp:
126
+ status_code = resp.status_code
127
+ async for line in resp.aiter_lines():
128
+ line = line.strip()
129
+ if not line.startswith("data:"):
130
+ continue
131
+ payload = line[len("data:") :].strip()
132
+ if not payload:
133
+ continue
134
+ try:
135
+ events.append(json.loads(payload))
136
+ except json.JSONDecodeError:
137
+ events.append({"_raw": payload})
138
+ return status_code, events
139
+
140
+ async def http_get_raw(url: str, timeout: float = 10) -> tuple[int, str]:
141
+ """Raw GET for external connectivity checks."""
142
+ async with httpx.AsyncClient(timeout=timeout) as client:
143
+ resp = await client.get(url)
144
+ return resp.status_code, resp.text[:200]
145
+
146
+ except ImportError:
147
+ import urllib.error
148
+ import urllib.request
149
+
150
+ _HTTP_BACKEND = "urllib"
151
+
152
+ def _build_headers() -> dict:
153
+ h = {"Content-Type": "application/json", "Accept": "application/json"}
154
+ if API_KEY:
155
+ h["X-API-Key"] = API_KEY
156
+ return h
157
+
158
+ async def http_get(path: str, timeout: float = TIMEOUT) -> tuple[int, dict]:
159
+ url = BASE_URL + path
160
+ req = urllib.request.Request(url, headers=_build_headers(), method="GET")
161
+ try:
162
+ with urllib.request.urlopen(req, timeout=timeout) as r:
163
+ return r.status, json.loads(r.read().decode())
164
+ except urllib.error.HTTPError as e:
165
+ return e.code, {}
166
+
167
+ async def http_post(path: str, body: dict, timeout: float = TIMEOUT) -> tuple[int, dict]:
168
+ url = BASE_URL + path
169
+ data = json.dumps(body).encode()
170
+ req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST")
171
+ try:
172
+ with urllib.request.urlopen(req, timeout=timeout) as r:
173
+ return r.status, json.loads(r.read().decode())
174
+ except urllib.error.HTTPError as e:
175
+ return e.code, {}
176
+
177
+ async def http_post_sse(
178
+ path: str, body: dict, timeout: float = TIMEOUT
179
+ ) -> tuple[int, list[dict]]:
180
+ """urllib fallback: SSE ์ŠคํŠธ๋ฆฌ๋ฐ์„ ๋™๊ธฐ ๋ฐฉ์‹์œผ๋กœ ์ฝ๋Š”๋‹ค."""
181
+ url = BASE_URL + path
182
+ data = json.dumps(body).encode()
183
+ h = _build_headers()
184
+ h["Accept"] = "text/event-stream"
185
+ req = urllib.request.Request(url, data=data, headers=h, method="POST")
186
+ events: list[dict] = []
187
+ status_code = 0
188
+ try:
189
+ with urllib.request.urlopen(req, timeout=timeout) as r:
190
+ status_code = r.status
191
+ for raw_line in r:
192
+ line = raw_line.decode("utf-8", errors="replace").strip()
193
+ if not line.startswith("data:"):
194
+ continue
195
+ payload = line[len("data:") :].strip()
196
+ if not payload:
197
+ continue
198
+ try:
199
+ events.append(json.loads(payload))
200
+ except json.JSONDecodeError:
201
+ events.append({"_raw": payload})
202
+ except urllib.error.HTTPError as e:
203
+ status_code = e.code
204
+ return status_code, events
205
+
206
+ async def http_get_raw(url: str, timeout: float = 10) -> tuple[int, str]:
207
+ """Raw GET for external connectivity checks."""
208
+ req = urllib.request.Request(url, method="GET")
209
+ try:
210
+ with urllib.request.urlopen(req, timeout=timeout) as r:
211
+ return r.status, r.read().decode()[:200]
212
+ except urllib.error.HTTPError as e:
213
+ return e.code, ""
214
+ except Exception:
215
+ return 0, ""
216
+
217
+
218
+ # ---------------------------------------------------------------------------
219
+ # ๊ฒฐ๊ณผ ๊ธฐ๋ก / ์ถœ๋ ฅ ํ—ฌํผ
220
+ # ---------------------------------------------------------------------------
221
+
222
+
223
+ def _record(
224
+ scenario_num: int,
225
+ name: str,
226
+ phase: int,
227
+ status: str,
228
+ elapsed: float,
229
+ attempts: int = 1,
230
+ assertions: Optional[list[str]] = None,
231
+ warnings: Optional[list[str]] = None,
232
+ error: Optional[str] = None,
233
+ detail: Optional[Any] = None,
234
+ ) -> dict:
235
+ tag = {"passed": "[PASS]", "failed": "[FAIL]", "skipped": "[SKIP]"}.get(status, "[????]")
236
+ suffix = f"({elapsed:.2f}s)"
237
+ if status == "passed":
238
+ print(f"{tag} Scenario {scenario_num}: {name} {suffix}")
239
+ elif status == "skipped":
240
+ print(f"{tag} Scenario {scenario_num}: {name} โ€” {error or 'skipped'} {suffix}")
241
+ else:
242
+ print(f"{tag} Scenario {scenario_num}: {name} โ€” {error} {suffix}")
243
+
244
+ if warnings:
245
+ for w in warnings:
246
+ print(f" [WARN] {w}")
247
+
248
+ entry = {
249
+ "id": scenario_num,
250
+ "name": name,
251
+ "phase": phase,
252
+ "status": status,
253
+ "attempts": attempts,
254
+ "elapsed_s": round(elapsed, 3),
255
+ "assertions": assertions or [],
256
+ "warnings": warnings or [],
257
+ "error": error,
258
+ "detail": detail,
259
+ }
260
+ _results.append(entry)
261
+ return entry
262
+
263
+
264
+ def _session_id(scenario_num: int) -> str:
265
+ return f"e2e-{scenario_num}-{uuid4().hex[:8]}"
266
+
267
+
268
+ def _extract_text_from_events(events: list[dict]) -> str:
269
+ """SSE ์ด๋ฒคํŠธ ๋ชฉ๋ก์—์„œ ์ตœ์ข… ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•œ๋‹ค."""
270
+ for ev in reversed(events):
271
+ if ev.get("node") == "synthesis" and ev.get("final_text"):
272
+ return ev["final_text"]
273
+ for ev in reversed(events):
274
+ if ev.get("finished") and ev.get("text"):
275
+ return ev["text"]
276
+ chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events]
277
+ return "".join(c for c in chunks if c)
278
+
279
+
280
+ def _contains_legal_keyword(text: str) -> bool:
281
+ return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS)
282
+
283
+
284
+ # ---------------------------------------------------------------------------
285
+ # Agent ํ˜ธ์ถœ ํ—ฌํผ: _call_agent_with_approval()
286
+ # ---------------------------------------------------------------------------
287
+
288
+
289
+ async def _call_agent_with_approval(
290
+ query: str,
291
+ session_id: str,
292
+ approve: bool = True,
293
+ timeout: float = 180,
294
+ ) -> tuple[bool, str, dict, Optional[str]]:
295
+ """์—์ด์ „ํŠธ SSE ์ŠคํŠธ๋ฆฌ๋ฐ์œผ๋กœ ํ˜ธ์ถœ โ†’ awaiting_approval๊นŒ์ง€ ํŒŒ์‹ฑ โ†’ approve/reject.
296
+
297
+ Returns: (success, text, metadata_dict, error)
298
+ metadata_dict keys: planned_tools, task_type, tool_results, adapter_mode, tool_args
299
+ """
300
+ body = {"query": query, "session_id": session_id, "use_rag": False}
301
+ meta: dict[str, Any] = {
302
+ "planned_tools": [],
303
+ "task_type": None,
304
+ "tool_results": {},
305
+ "adapter_mode": None,
306
+ "tool_args": {},
307
+ }
308
+
309
+ # --- SSE ์ŠคํŠธ๋ฆฌ๋ฐ ์‹œ๋„ ---
310
+ try:
311
+ status_code, events = await http_post_sse("/v2/agent/stream", body, timeout=timeout)
312
+ if status_code != 200:
313
+ raise RuntimeError(f"SSE HTTP {status_code}")
314
+
315
+ # awaiting_approval ๋˜๋Š” __interrupt__ ์ด๋ฒคํŠธ ํƒ์ƒ‰
316
+ awaiting = None
317
+ for ev in events:
318
+ if ev.get("status") == "awaiting_approval" or ev.get("node") == "__interrupt__":
319
+ awaiting = ev
320
+ break
321
+ # ํ”Œ๋ž˜๋„ˆ ๋…ธ๋“œ์—์„œ planned_tools ์ถ”์ถœ
322
+ if ev.get("planned_tools"):
323
+ meta["planned_tools"] = ev["planned_tools"]
324
+ if ev.get("task_type"):
325
+ meta["task_type"] = ev["task_type"]
326
+ if ev.get("adapter_mode"):
327
+ meta["adapter_mode"] = ev["adapter_mode"]
328
+ if ev.get("tool_args"):
329
+ meta["tool_args"] = ev["tool_args"]
330
+
331
+ if awaiting:
332
+ # awaiting ์ด๋ฒคํŠธ์—์„œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ถ”์ถœ
333
+ if awaiting.get("planned_tools"):
334
+ meta["planned_tools"] = awaiting["planned_tools"]
335
+ if awaiting.get("task_type"):
336
+ meta["task_type"] = awaiting["task_type"]
337
+ if awaiting.get("adapter_mode"):
338
+ meta["adapter_mode"] = awaiting["adapter_mode"]
339
+ if awaiting.get("tool_args"):
340
+ meta["tool_args"] = awaiting["tool_args"]
341
+
342
+ thread_id = awaiting.get("thread_id") or session_id
343
+
344
+ # approve/reject
345
+ approve_code, approve_resp = await http_post(
346
+ f"/v2/agent/approve?thread_id={thread_id}&approved={'true' if approve else 'false'}",
347
+ {},
348
+ timeout=timeout,
349
+ )
350
+ if approve_code != 200:
351
+ return False, "", meta, f"approve HTTP {approve_code}: {approve_resp}"
352
+
353
+ # approve ์‘๋‹ต์—์„œ ์ตœ์ข… ํ…์ŠคํŠธ ๋ฐ ๋„๊ตฌ ๊ฒฐ๊ณผ ์ถ”์ถœ
354
+ final_text = approve_resp.get("text", "") or approve_resp.get("final_text", "") or ""
355
+ if approve_resp.get("tool_results"):
356
+ meta["tool_results"] = approve_resp["tool_results"]
357
+ if approve_resp.get("adapter_mode"):
358
+ meta["adapter_mode"] = approve_resp["adapter_mode"]
359
+ if approve_resp.get("status") == "rejected":
360
+ return True, final_text, meta, None
361
+
362
+ if final_text:
363
+ return True, final_text, meta, None
364
+ return False, "", meta, f"approve 200 but text ์—†์Œ: {approve_resp}"
365
+
366
+ # awaiting ์ด๋ฒคํŠธ ์—†์ด ์ตœ์ข… ํ…์ŠคํŠธ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ (auto-approve ๋ชจ๋“œ)
367
+ text = _extract_text_from_events(events)
368
+ # ์ด๋ฒคํŠธ์—์„œ ์ถ”๊ฐ€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ˆ˜์ง‘
369
+ for ev in events:
370
+ if ev.get("planned_tools") and not meta["planned_tools"]:
371
+ meta["planned_tools"] = ev["planned_tools"]
372
+ if ev.get("task_type") and not meta["task_type"]:
373
+ meta["task_type"] = ev["task_type"]
374
+ if ev.get("tool_results") and not meta["tool_results"]:
375
+ meta["tool_results"] = ev["tool_results"]
376
+ if ev.get("adapter_mode") and not meta["adapter_mode"]:
377
+ meta["adapter_mode"] = ev["adapter_mode"]
378
+ if ev.get("tool_args") and not meta["tool_args"]:
379
+ meta["tool_args"] = ev["tool_args"]
380
+
381
+ if text:
382
+ return True, text, meta, None
383
+
384
+ # error ์ด๋ฒคํŠธ ํ™•์ธ
385
+ for ev in events:
386
+ if ev.get("status") == "error":
387
+ return False, "", meta, ev.get("error", "unknown error")
388
+
389
+ return False, "", meta, f"SSE ์ด๋ฒคํŠธ ์ˆ˜์‹ ํ–ˆ์œผ๋‚˜ text/awaiting ์—†์Œ (events={len(events)})"
390
+
391
+ except Exception as sse_exc:
392
+ logger.warning("SSE stream failed: %s โ€” falling back to REST", sse_exc)
393
+
394
+ # --- REST fallback: /v2/agent/run ---
395
+ try:
396
+ status_code, resp = await http_post("/v2/agent/run", body, timeout=timeout)
397
+ if status_code != 200:
398
+ return False, "", meta, f"REST HTTP {status_code}: {resp}"
399
+
400
+ if resp.get("planned_tools"):
401
+ meta["planned_tools"] = resp["planned_tools"]
402
+ if resp.get("task_type"):
403
+ meta["task_type"] = resp["task_type"]
404
+ if resp.get("adapter_mode"):
405
+ meta["adapter_mode"] = resp["adapter_mode"]
406
+ if resp.get("tool_args"):
407
+ meta["tool_args"] = resp["tool_args"]
408
+
409
+ if resp.get("status") == "awaiting_approval":
410
+ thread_id = resp.get("thread_id") or session_id
411
+ approve_code, approve_resp = await http_post(
412
+ f"/v2/agent/approve?thread_id={thread_id}&approved={'true' if approve else 'false'}",
413
+ {},
414
+ timeout=timeout,
415
+ )
416
+ if approve_code != 200:
417
+ return False, "", meta, f"approve HTTP {approve_code}"
418
+ final_text = approve_resp.get("text", "") or approve_resp.get("final_text", "") or ""
419
+ if approve_resp.get("tool_results"):
420
+ meta["tool_results"] = approve_resp["tool_results"]
421
+ if approve_resp.get("status") == "rejected":
422
+ return True, final_text, meta, None
423
+ if final_text:
424
+ return True, final_text, meta, None
425
+ return False, "", meta, "approve 200 but text ์—†์Œ"
426
+
427
+ if resp.get("status") == "error":
428
+ return False, "", meta, resp.get("error", "agent run error")
429
+
430
+ text = resp.get("text", "") or resp.get("final_text", "")
431
+ if resp.get("tool_results"):
432
+ meta["tool_results"] = resp["tool_results"]
433
+ if text:
434
+ return True, text, meta, None
435
+ return False, "", meta, f"text ์—†์Œ, status={resp.get('status')}"
436
+
437
+ except Exception as exc:
438
+ return False, "", meta, str(exc)
439
+
440
+
441
+ # ---------------------------------------------------------------------------
442
+ # Phase 1: Infrastructure (hard gate)
443
+ # ---------------------------------------------------------------------------
444
+
445
+
446
+ async def scenario1_health_profile() -> dict:
447
+ """Scenario 1: Health & Profile (retry 3x with backoff)."""
448
+ backoffs = [5, 10, 20]
449
+ attempts = 0
450
+ last_error = ""
451
+
452
+ for attempt_idx in range(3):
453
+ attempts += 1
454
+ t0 = time.monotonic()
455
+ try:
456
+ status_code, body = await http_get("/health", timeout=10)
457
+ elapsed = time.monotonic() - t0
458
+
459
+ assertions = []
460
+ if status_code != 200:
461
+ last_error = f"HTTP {status_code}"
462
+ if attempt_idx < 2:
463
+ await asyncio.sleep(backoffs[attempt_idx])
464
+ continue
465
+ return _record(
466
+ 1,
467
+ "Health & Profile",
468
+ 1,
469
+ "failed",
470
+ elapsed,
471
+ attempts,
472
+ assertions=["HTTP 200"],
473
+ error=last_error,
474
+ detail={"body": body},
475
+ )
476
+
477
+ assertions.append("HTTP 200: OK")
478
+ srv_status = body.get("status", "")
479
+ if srv_status not in ("ok", "healthy"):
480
+ last_error = f"status={srv_status!r}, expected ok/healthy"
481
+ if attempt_idx < 2:
482
+ await asyncio.sleep(backoffs[attempt_idx])
483
+ continue
484
+ return _record(
485
+ 1,
486
+ "Health & Profile",
487
+ 1,
488
+ "failed",
489
+ elapsed,
490
+ attempts,
491
+ assertions=assertions,
492
+ error=last_error,
493
+ detail={"body": body},
494
+ )
495
+ assertions.append(f"status={srv_status}: OK")
496
+
497
+ warnings = []
498
+ if "model" not in body:
499
+ warnings.append("model field not found in /health")
500
+ else:
501
+ assertions.append(f"model={body['model']}: OK")
502
+
503
+ if "profile" not in body:
504
+ warnings.append("profile field not found in /health")
505
+ else:
506
+ assertions.append(f"profile={body['profile']}: OK")
507
+
508
+ return _record(
509
+ 1,
510
+ "Health & Profile",
511
+ 1,
512
+ "passed",
513
+ elapsed,
514
+ attempts,
515
+ assertions=assertions,
516
+ warnings=warnings,
517
+ detail={
518
+ "status": srv_status,
519
+ "model": body.get("model"),
520
+ "profile": body.get("profile"),
521
+ },
522
+ )
523
+
524
+ except Exception as exc:
525
+ last_error = str(exc)
526
+ if attempt_idx < 2:
527
+ await asyncio.sleep(backoffs[attempt_idx])
528
+ continue
529
+ return _record(
530
+ 1,
531
+ "Health & Profile",
532
+ 1,
533
+ "failed",
534
+ time.monotonic() - t0,
535
+ attempts,
536
+ error=last_error,
537
+ )
538
+
539
+ return _record(1, "Health & Profile", 1, "failed", 0, attempts, error=last_error)
540
+
541
+
542
+ async def scenario2_base_model_generation() -> dict:
543
+ """Scenario 2: Base Model Generation (retry 2x)."""
544
+ body_completions = {
545
+ "model": BASE_MODEL,
546
+ "prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š”",
547
+ "max_tokens": 32,
548
+ "temperature": 0.0,
549
+ }
550
+ last_error = ""
551
+ attempts = 0
552
+
553
+ for attempt_idx in range(2):
554
+ attempts += 1
555
+ t0 = time.monotonic()
556
+ try:
557
+ status_code, resp = await http_post("/v1/completions", body_completions, timeout=60)
558
+ elapsed = time.monotonic() - t0
559
+
560
+ if status_code == 200:
561
+ choices = resp.get("choices", [])
562
+ if choices and choices[0].get("text") is not None:
563
+ text = choices[0]["text"]
564
+ if text.strip():
565
+ return _record(
566
+ 2,
567
+ "Base Model Generation",
568
+ 1,
569
+ "passed",
570
+ elapsed,
571
+ attempts,
572
+ assertions=["HTTP 200", "non-empty text"],
573
+ detail={"endpoint": "/v1/completions", "text_preview": text[:100]},
574
+ )
575
+
576
+ # fallback /v1/generate
577
+ body_legacy = {
578
+ "prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š”",
579
+ "max_tokens": 32,
580
+ "temperature": 0.0,
581
+ "use_rag": False,
582
+ }
583
+ status_code2, resp2 = await http_post("/v1/generate", body_legacy, timeout=60)
584
+ elapsed2 = time.monotonic() - t0
585
+
586
+ if status_code2 == 200 and resp2.get("text", "").strip():
587
+ return _record(
588
+ 2,
589
+ "Base Model Generation",
590
+ 1,
591
+ "passed",
592
+ elapsed2,
593
+ attempts,
594
+ assertions=["HTTP 200 (fallback)", "non-empty text"],
595
+ detail={"endpoint": "/v1/generate", "text_preview": resp2["text"][:100]},
596
+ )
597
+
598
+ last_error = f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}"
599
+ except Exception as exc:
600
+ last_error = str(exc)
601
+
602
+ return _record(
603
+ 2, "Base Model Generation", 1, "failed", time.monotonic() - t0, attempts, error=last_error
604
+ )
605
+
606
+
607
+ async def scenario3_adapter_registry() -> dict:
608
+ """Scenario 3: Adapter Registry via /v1/models."""
609
+ t0 = time.monotonic()
610
+ try:
611
+ status_code, resp = await http_get("/v1/models", timeout=10)
612
+ elapsed = time.monotonic() - t0
613
+
614
+ assertions = []
615
+ warnings = []
616
+
617
+ if status_code != 200:
618
+ return _record(
619
+ 3,
620
+ "Adapter Registry",
621
+ 1,
622
+ "failed",
623
+ elapsed,
624
+ assertions=["HTTP 200"],
625
+ error=f"HTTP {status_code}",
626
+ detail={"resp": resp},
627
+ )
628
+ assertions.append("HTTP 200: OK")
629
+
630
+ data = resp.get("data", [])
631
+ if not isinstance(data, list):
632
+ return _record(
633
+ 3,
634
+ "Adapter Registry",
635
+ 1,
636
+ "failed",
637
+ elapsed,
638
+ assertions=assertions,
639
+ error="data array missing or invalid",
640
+ detail={"resp": resp},
641
+ )
642
+ assertions.append(f"data array: {len(data)} models")
643
+
644
+ model_ids = [m.get("id", "") for m in data]
645
+ civil_found = any("civil" in mid for mid in model_ids)
646
+ legal_found = any("legal" in mid for mid in model_ids)
647
+
648
+ if not civil_found:
649
+ warnings.append("civil adapter not detected in /v1/models (WARN, not FAIL)")
650
+ else:
651
+ assertions.append("civil adapter detected")
652
+ if not legal_found:
653
+ warnings.append("legal adapter not detected in /v1/models (WARN, not FAIL)")
654
+ else:
655
+ assertions.append("legal adapter detected")
656
+
657
+ return _record(
658
+ 3,
659
+ "Adapter Registry",
660
+ 1,
661
+ "passed",
662
+ elapsed,
663
+ assertions=assertions,
664
+ warnings=warnings,
665
+ detail={"model_ids": model_ids, "civil_found": civil_found, "legal_found": legal_found},
666
+ )
667
+
668
+ except Exception as exc:
669
+ return _record(3, "Adapter Registry", 1, "failed", time.monotonic() - t0, error=str(exc))
670
+
671
+
672
+ # ---------------------------------------------------------------------------
673
+ # Phase 2: Agent Pipeline Core
674
+ # ---------------------------------------------------------------------------
675
+
676
+ # Scenario 5/6 ๊ณต์œ  ์„ธ์…˜
677
+ _scenario5_session_id: Optional[str] = None
678
+ _scenario5_passed: bool = False
679
+
680
+
681
+ async def scenario4_planner_valid_plan() -> dict:
682
+ """Scenario 4: Planner Produces Valid Plan (retry 2x)."""
683
+ query = "์„œ์šธ์‹œ ๋„๋กœ ํŒŒ์† ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”"
684
+ last_error = ""
685
+ attempts = 0
686
+
687
+ for attempt_idx in range(2):
688
+ attempts += 1
689
+ t0 = time.monotonic()
690
+ try:
691
+ sid = _session_id(4)
692
+ ok, text, meta, err = await _call_agent_with_approval(
693
+ query, sid, approve=True, timeout=120
694
+ )
695
+ elapsed = time.monotonic() - t0
696
+
697
+ planned = meta.get("planned_tools", [])
698
+ if planned:
699
+ _observed_tools.update(planned)
700
+
701
+ assertions = []
702
+ if not planned:
703
+ last_error = err or "planned_tools ๋น„์–ด์žˆ์Œ"
704
+ if attempt_idx < 1:
705
+ continue
706
+ return _record(
707
+ 4,
708
+ "Planner Produces Valid Plan",
709
+ 2,
710
+ "failed",
711
+ elapsed,
712
+ attempts,
713
+ assertions=["planned_tools non-empty"],
714
+ error=last_error,
715
+ detail={"meta": meta},
716
+ )
717
+
718
+ assertions.append(f"planned_tools: {planned}")
719
+ invalid = [t for t in planned if t not in VALID_TOOLS]
720
+ if invalid:
721
+ last_error = f"invalid tools: {invalid}"
722
+ if attempt_idx < 1:
723
+ continue
724
+ return _record(
725
+ 4,
726
+ "Planner Produces Valid Plan",
727
+ 2,
728
+ "failed",
729
+ elapsed,
730
+ attempts,
731
+ assertions=assertions,
732
+ error=last_error,
733
+ detail={"invalid_tools": invalid, "valid": list(VALID_TOOLS)},
734
+ )
735
+
736
+ assertions.append("all tools in VALID_TOOLS whitelist")
737
+ return _record(
738
+ 4,
739
+ "Planner Produces Valid Plan",
740
+ 2,
741
+ "passed",
742
+ elapsed,
743
+ attempts,
744
+ assertions=assertions,
745
+ detail={"planned_tools": planned, "meta": meta},
746
+ )
747
+
748
+ except Exception as exc:
749
+ last_error = str(exc)
750
+
751
+ return _record(4, "Planner Produces Valid Plan", 2, "failed", 0, attempts, error=last_error)
752
+
753
+
754
+ async def scenario5_civil_lora_draft() -> dict:
755
+ """Scenario 5: Civil LoRA Draft Response (retry 2x)."""
756
+ global _scenario5_session_id, _scenario5_passed
757
+ query = "์•„ํŒŒํŠธ ์ธต๊ฐ„์†Œ์Œ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”"
758
+ last_error = ""
759
+ attempts = 0
760
+
761
+ for attempt_idx in range(2):
762
+ attempts += 1
763
+ t0 = time.monotonic()
764
+ try:
765
+ sid = _session_id(5)
766
+ ok, text, meta, err = await _call_agent_with_approval(
767
+ query, sid, approve=True, timeout=180
768
+ )
769
+ elapsed = time.monotonic() - t0
770
+
771
+ planned = meta.get("planned_tools", [])
772
+ if planned:
773
+ _observed_tools.update(planned)
774
+
775
+ assertions = []
776
+
777
+ if not ok:
778
+ last_error = err or "agent call failed"
779
+ if attempt_idx < 1:
780
+ continue
781
+ return _record(
782
+ 5,
783
+ "Civil LoRA Draft Response",
784
+ 2,
785
+ "failed",
786
+ elapsed,
787
+ attempts,
788
+ assertions=assertions,
789
+ error=last_error,
790
+ detail={"meta": meta},
791
+ )
792
+
793
+ has_draft = "draft_civil_response" in planned
794
+ if has_draft:
795
+ assertions.append("draft_civil_response in planned_tools")
796
+ else:
797
+ assertions.append(f"draft_civil_response NOT in planned_tools ({planned})")
798
+
799
+ if len(text) >= 50:
800
+ assertions.append(f"text length {len(text)} >= 50")
801
+ else:
802
+ assertions.append(f"text length {len(text)} < 50 (FAIL)")
803
+
804
+ task_type = meta.get("task_type")
805
+ if task_type == "draft_response":
806
+ assertions.append("task_type=draft_response")
807
+ else:
808
+ assertions.append(f"task_type={task_type} (expected draft_response)")
809
+
810
+ # ํ•ต์‹ฌ ๊ฒ€์ฆ: text >= 50 ์ด๋ฉด PASS (planned_tools์™€ task_type์€ soft ๊ฒ€์ฆ)
811
+ passed = len(text) >= 50
812
+ if passed:
813
+ _scenario5_session_id = sid
814
+ _scenario5_passed = True
815
+
816
+ warnings = []
817
+ if not has_draft:
818
+ warnings.append("draft_civil_response not in planned_tools")
819
+ if task_type != "draft_response":
820
+ warnings.append(f"task_type={task_type}, expected draft_response")
821
+
822
+ if passed:
823
+ return _record(
824
+ 5,
825
+ "Civil LoRA Draft Response",
826
+ 2,
827
+ "passed",
828
+ elapsed,
829
+ attempts,
830
+ assertions=assertions,
831
+ warnings=warnings,
832
+ detail={"text_preview": text[:200], "meta": meta},
833
+ )
834
+
835
+ last_error = "text < 50 chars"
836
+ if attempt_idx < 1:
837
+ continue
838
+ return _record(
839
+ 5,
840
+ "Civil LoRA Draft Response",
841
+ 2,
842
+ "failed",
843
+ elapsed,
844
+ attempts,
845
+ assertions=assertions,
846
+ warnings=warnings,
847
+ error=last_error,
848
+ detail={"text_preview": text[:200], "meta": meta},
849
+ )
850
+
851
+ except Exception as exc:
852
+ last_error = str(exc)
853
+
854
+ return _record(5, "Civil LoRA Draft Response", 2, "failed", 0, attempts, error=last_error)
855
+
856
+
857
+ async def scenario6_legal_lora_evidence() -> dict:
858
+ """Scenario 6: Legal LoRA Evidence Augmentation (depends on Scenario 5)."""
859
+ if not _scenario5_passed:
860
+ return _record(
861
+ 6,
862
+ "Legal LoRA Evidence Augmentation",
863
+ 2,
864
+ "skipped",
865
+ 0,
866
+ error="Scenario 5 failed โ€” dependency skip",
867
+ )
868
+
869
+ query = "์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น๊ณผ ํŒ๋ก€ ๊ทผ๊ฑฐ๋ฅผ ์ถ”๊ฐ€ํ•ด์ฃผ์„ธ์š”"
870
+ last_error = ""
871
+ attempts = 0
872
+
873
+ for attempt_idx in range(2):
874
+ attempts += 1
875
+ t0 = time.monotonic()
876
+ try:
877
+ ok, text, meta, err = await _call_agent_with_approval(
878
+ query, _scenario5_session_id, approve=True, timeout=180
879
+ )
880
+ elapsed = time.monotonic() - t0
881
+
882
+ planned = meta.get("planned_tools", [])
883
+ if planned:
884
+ _observed_tools.update(planned)
885
+
886
+ assertions = []
887
+
888
+ if not ok:
889
+ last_error = err or "agent call failed"
890
+ if attempt_idx < 1:
891
+ continue
892
+ return _record(
893
+ 6,
894
+ "Legal LoRA Evidence Augmentation",
895
+ 2,
896
+ "failed",
897
+ elapsed,
898
+ attempts,
899
+ assertions=assertions,
900
+ error=last_error,
901
+ detail={"meta": meta},
902
+ )
903
+
904
+ has_evidence = "append_evidence" in planned
905
+ if has_evidence:
906
+ assertions.append("append_evidence in planned_tools")
907
+ else:
908
+ assertions.append(f"append_evidence NOT in planned_tools ({planned})")
909
+
910
+ has_legal = _contains_legal_keyword(text)
911
+ matched = [p for p in LEGAL_PATTERNS if re.search(p, text)]
912
+ if has_legal:
913
+ assertions.append(f"legal patterns found: {matched[:3]}")
914
+ else:
915
+ assertions.append("no legal patterns found (FAIL)")
916
+
917
+ warnings = []
918
+ if not has_evidence:
919
+ warnings.append("append_evidence not in planned_tools")
920
+
921
+ if has_legal:
922
+ return _record(
923
+ 6,
924
+ "Legal LoRA Evidence Augmentation",
925
+ 2,
926
+ "passed",
927
+ elapsed,
928
+ attempts,
929
+ assertions=assertions,
930
+ warnings=warnings,
931
+ detail={"text_preview": text[:300], "matched_patterns": matched, "meta": meta},
932
+ )
933
+
934
+ last_error = "legal pattern not found in response"
935
+ if attempt_idx < 1:
936
+ continue
937
+ return _record(
938
+ 6,
939
+ "Legal LoRA Evidence Augmentation",
940
+ 2,
941
+ "failed",
942
+ elapsed,
943
+ attempts,
944
+ assertions=assertions,
945
+ warnings=warnings,
946
+ error=last_error,
947
+ detail={"text_preview": text[:300], "meta": meta},
948
+ )
949
+
950
+ except Exception as exc:
951
+ last_error = str(exc)
952
+
953
+ return _record(
954
+ 6, "Legal LoRA Evidence Augmentation", 2, "failed", 0, attempts, error=last_error
955
+ )
956
+
957
+
958
+ async def scenario7_task_type_classification() -> dict:
959
+ """Scenario 7: Task Type Classification (at least 2/3 correct)."""
960
+ test_cases = [
961
+ ("๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜", {"draft_response"}),
962
+ ("๊ด€๋ จ ํ†ต๊ณ„ ๋ฐ์ดํ„ฐ๋ฅผ ์กฐํšŒํ•ด์ค˜", {"stats_query", "lookup_stats"}),
963
+ ("์ด ๋ฏผ์›์˜ ๊ทผ๊ฑฐ๋ฅผ ๋ณด๊ฐ•ํ•ด์ค˜", {"append_evidence"}),
964
+ ]
965
+
966
+ t0 = time.monotonic()
967
+ correct = 0
968
+ sub_results = []
969
+
970
+ for query, expected_types in test_cases:
971
+ try:
972
+ sid = _session_id(7)
973
+ ok, text, meta, err = await _call_agent_with_approval(
974
+ query, sid, approve=True, timeout=180
975
+ )
976
+
977
+ planned = meta.get("planned_tools", [])
978
+ if planned:
979
+ _observed_tools.update(planned)
980
+
981
+ actual_type = meta.get("task_type")
982
+ matched = actual_type in expected_types if actual_type else False
983
+ if matched:
984
+ correct += 1
985
+
986
+ sub_results.append(
987
+ {
988
+ "query": query[:30],
989
+ "expected": list(expected_types),
990
+ "actual": actual_type,
991
+ "matched": matched,
992
+ "ok": ok,
993
+ "error": err,
994
+ }
995
+ )
996
+ except Exception as exc:
997
+ sub_results.append(
998
+ {
999
+ "query": query[:30],
1000
+ "expected": list(expected_types),
1001
+ "actual": None,
1002
+ "matched": False,
1003
+ "error": str(exc),
1004
+ }
1005
+ )
1006
+
1007
+ elapsed = time.monotonic() - t0
1008
+ assertions = [f"{correct}/3 task types correct (need >= 2)"]
1009
+
1010
+ if correct >= 2:
1011
+ return _record(
1012
+ 7,
1013
+ "Task Type Classification",
1014
+ 2,
1015
+ "passed",
1016
+ elapsed,
1017
+ assertions=assertions,
1018
+ detail={"sub_results": sub_results, "correct": correct},
1019
+ )
1020
+ return _record(
1021
+ 7,
1022
+ "Task Type Classification",
1023
+ 2,
1024
+ "failed",
1025
+ elapsed,
1026
+ assertions=assertions,
1027
+ error=f"only {correct}/3 correct (need >= 2)",
1028
+ detail={"sub_results": sub_results},
1029
+ )
1030
+
1031
+
1032
+ # ---------------------------------------------------------------------------
1033
+ # Phase 3: data.go.kr API Tools (soft gate)
1034
+ # ---------------------------------------------------------------------------
1035
+
1036
+ _datago_available: bool = False
1037
+
1038
+
1039
+ async def _check_datago_connectivity() -> bool:
1040
+ """data.go.kr ์—ฐ๊ฒฐ ํ™•์ธ preflight."""
1041
+ global _datago_available
1042
+ try:
1043
+ code, _ = await http_get_raw("https://www.data.go.kr", timeout=10)
1044
+ _datago_available = code in (200, 301, 302, 403)
1045
+ return _datago_available
1046
+ except Exception:
1047
+ _datago_available = False
1048
+ return False
1049
+
1050
+
1051
+ async def scenario8_external_api_tools() -> dict:
1052
+ """Scenario 8: External API Tool Invocation (4 sub-cases, accept 3/4)."""
1053
+ if not _datago_available:
1054
+ return _record(
1055
+ 8,
1056
+ "External API Tool Invocation",
1057
+ 3,
1058
+ "skipped",
1059
+ 0,
1060
+ error="data.go.kr unreachable โ€” Phase 3 skipped",
1061
+ )
1062
+
1063
+ sub_cases = [
1064
+ ("8a", "์ตœ๊ทผ ๋„๋กœ ๊ด€๋ จ ๋ฏผ์› ์ด์Šˆ๋ฅผ ๋ถ„์„ํ•ด์ค˜", "issue_detector"),
1065
+ ("8b", "์„œ์šธ์‹œ ๋ฏผ์› ํ†ต๊ณ„๋ฅผ ์กฐํšŒํ•ด์ค˜", "stats_lookup"),
1066
+ ("8c", "๋„๋กœ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ํŠธ๋ Œ๋“œ๋ฅผ ๋ถ„์„ํ•ด์ค˜", "keyword_analyzer"),
1067
+ ("8d", "์„œ์šธ์‹œ ๊ฐ•๋‚จ๊ตฌ ๋ฏผ์› ์ธ๊ตฌํ†ต๊ณ„๋ฅผ ์กฐํšŒํ•ด์ค˜", "demographics_lookup"),
1068
+ ]
1069
+
1070
+ t0 = time.monotonic()
1071
+ sub_passed = 0
1072
+ sub_results = []
1073
+
1074
+ for label, query, expected_tool in sub_cases:
1075
+ for attempt_idx in range(2): # retry 1x
1076
+ try:
1077
+ sid = _session_id(8)
1078
+ ok, text, meta, err = await _call_agent_with_approval(
1079
+ query, sid, approve=True, timeout=180
1080
+ )
1081
+
1082
+ planned = meta.get("planned_tools", [])
1083
+ if planned:
1084
+ _observed_tools.update(planned)
1085
+
1086
+ tool_in_plan = expected_tool in planned
1087
+ tool_results = meta.get("tool_results", {})
1088
+ tool_in_results = expected_tool in tool_results
1089
+
1090
+ passed = tool_in_plan # tool in planned_tools suffices
1091
+ if passed:
1092
+ sub_passed += 1
1093
+
1094
+ sub_results.append(
1095
+ {
1096
+ "label": label,
1097
+ "expected_tool": expected_tool,
1098
+ "tool_in_plan": tool_in_plan,
1099
+ "tool_in_results": tool_in_results,
1100
+ "planned_tools": planned,
1101
+ "passed": passed,
1102
+ "attempt": attempt_idx + 1,
1103
+ "error": err,
1104
+ }
1105
+ )
1106
+ break # no retry needed if we got a response
1107
+
1108
+ except Exception as exc:
1109
+ if attempt_idx == 1:
1110
+ sub_results.append(
1111
+ {
1112
+ "label": label,
1113
+ "expected_tool": expected_tool,
1114
+ "passed": False,
1115
+ "error": str(exc),
1116
+ "attempt": attempt_idx + 1,
1117
+ }
1118
+ )
1119
+
1120
+ elapsed = time.monotonic() - t0
1121
+ assertions = [f"{sub_passed}/4 sub-cases passed (need >= 3)"]
1122
+
1123
+ if sub_passed >= 3:
1124
+ return _record(
1125
+ 8,
1126
+ "External API Tool Invocation",
1127
+ 3,
1128
+ "passed",
1129
+ elapsed,
1130
+ assertions=assertions,
1131
+ detail={"sub_results": sub_results},
1132
+ )
1133
+ return _record(
1134
+ 8,
1135
+ "External API Tool Invocation",
1136
+ 3,
1137
+ "failed",
1138
+ elapsed,
1139
+ assertions=assertions,
1140
+ error=f"only {sub_passed}/4 passed (need >= 3)",
1141
+ detail={"sub_results": sub_results},
1142
+ )
1143
+
1144
+
1145
+ # ---------------------------------------------------------------------------
1146
+ # Phase 4: Adapter Dynamics
1147
+ # ---------------------------------------------------------------------------
1148
+
1149
+
1150
+ async def scenario9_sequential_adapter_switching() -> dict:
1151
+ """Scenario 9: Sequential Adapter Switching (3 iterations, 3 requests each)."""
1152
+ t0 = time.monotonic()
1153
+ errors: list[str] = []
1154
+ total_requests = 0
1155
+
1156
+ for i in range(1, 4):
1157
+ sid = _session_id(9)
1158
+
1159
+ # Civil query
1160
+ ok1, text1, meta1, err1 = await _call_agent_with_approval(
1161
+ "์ฃผ์ฐจ ์œ„๋ฐ˜ ๊ณผํƒœ๋ฃŒ ์ด์˜์‹ ์ฒญ ๋ฏผ์› ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•ด์ค˜", sid, approve=True, timeout=180
1162
+ )
1163
+ total_requests += 1
1164
+ if meta1.get("planned_tools"):
1165
+ _observed_tools.update(meta1["planned_tools"])
1166
+ if not ok1 or not text1.strip():
1167
+ errors.append(f"iter {i} civil-1: {err1 or '๋นˆ ์‘๋‹ต'}")
1168
+ continue
1169
+
1170
+ # Legal query (same session)
1171
+ ok2, text2, meta2, err2 = await _call_agent_with_approval(
1172
+ "์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น ๊ทผ๊ฑฐ๋ฅผ ์ถ”๊ฐ€ํ•ด์ค˜", sid, approve=True, timeout=180
1173
+ )
1174
+ total_requests += 1
1175
+ if meta2.get("planned_tools"):
1176
+ _observed_tools.update(meta2["planned_tools"])
1177
+ if not ok2 or not text2.strip():
1178
+ errors.append(f"iter {i} legal: {err2 or '๋นˆ ์‘๋‹ต'}")
1179
+ continue
1180
+
1181
+ # Civil query again (same session)
1182
+ ok3, text3, meta3, err3 = await _call_agent_with_approval(
1183
+ "์ถ”๊ฐ€ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜", sid, approve=True, timeout=180
1184
+ )
1185
+ total_requests += 1
1186
+ if meta3.get("planned_tools"):
1187
+ _observed_tools.update(meta3["planned_tools"])
1188
+ if not ok3 or not text3.strip():
1189
+ errors.append(f"iter {i} civil-2: {err3 or '๋นˆ ์‘๋‹ต'}")
1190
+
1191
+ elapsed = time.monotonic() - t0
1192
+ assertions = [f"{total_requests} requests completed", f"{len(errors)} errors"]
1193
+
1194
+ if errors:
1195
+ return _record(
1196
+ 9,
1197
+ "Sequential Adapter Switching",
1198
+ 4,
1199
+ "failed",
1200
+ elapsed,
1201
+ assertions=assertions,
1202
+ error="; ".join(errors[:3]),
1203
+ detail={"iterations": 3, "total_requests": total_requests, "errors": errors},
1204
+ )
1205
+ return _record(
1206
+ 9,
1207
+ "Sequential Adapter Switching",
1208
+ 4,
1209
+ "passed",
1210
+ elapsed,
1211
+ assertions=assertions,
1212
+ detail={"iterations": 3, "total_requests": total_requests, "all_passed": True},
1213
+ )
1214
+
1215
+
1216
+ async def scenario10_lora_id_consistency() -> dict:
1217
+ """Scenario 10: LoRA ID Consistency (informational, always PASS)."""
1218
+ t0 = time.monotonic()
1219
+ try:
1220
+ _, resp_before = await http_get("/v1/models", timeout=10)
1221
+ models_before = [m.get("id", "") for m in resp_before.get("data", [])]
1222
+
1223
+ # Scenario 9 ์ด๋ฏธ ์™„๋ฃŒ๋œ ์ƒํƒœ์—์„œ ๋‹ค์‹œ ํ™•์ธ
1224
+ _, resp_after = await http_get("/v1/models", timeout=10)
1225
+ models_after = [m.get("id", "") for m in resp_after.get("data", [])]
1226
+
1227
+ elapsed = time.monotonic() - t0
1228
+ stable = set(models_before) == set(models_after)
1229
+ assertions = [
1230
+ f"before: {len(models_before)} models",
1231
+ f"after: {len(models_after)} models",
1232
+ f"stable: {stable}",
1233
+ ]
1234
+ warnings = [] if stable else ["adapter list changed between checks"]
1235
+
1236
+ return _record(
1237
+ 10,
1238
+ "LoRA ID Consistency",
1239
+ 4,
1240
+ "passed",
1241
+ elapsed,
1242
+ assertions=assertions,
1243
+ warnings=warnings,
1244
+ detail={"models_before": models_before, "models_after": models_after, "stable": stable},
1245
+ )
1246
+ except Exception as exc:
1247
+ return _record(
1248
+ 10,
1249
+ "LoRA ID Consistency",
1250
+ 4,
1251
+ "passed",
1252
+ time.monotonic() - t0,
1253
+ assertions=["informational check"],
1254
+ warnings=[f"could not verify: {exc}"],
1255
+ )
1256
+
1257
+
1258
+ # ---------------------------------------------------------------------------
1259
+ # Phase 5: Robustness
1260
+ # ---------------------------------------------------------------------------
1261
+
1262
+
1263
+ async def scenario11_empty_query() -> dict:
1264
+ """Scenario 11: Empty Query Handling (expect 422, NOT 500)."""
1265
+ t0 = time.monotonic()
1266
+ assertions = []
1267
+ last_error = ""
1268
+
1269
+ for attempt_idx in range(2):
1270
+ try:
1271
+ # REST endpoint
1272
+ code_rest, resp_rest = await http_post("/v2/agent/run", {"query": ""}, timeout=10)
1273
+ assertions.append(f"/v2/agent/run empty query: HTTP {code_rest}")
1274
+
1275
+ # SSE endpoint
1276
+ code_sse, events_sse = await http_post_sse(
1277
+ "/v2/agent/stream", {"query": ""}, timeout=10
1278
+ )
1279
+ assertions.append(f"/v2/agent/stream empty query: HTTP {code_sse}")
1280
+
1281
+ elapsed = time.monotonic() - t0
1282
+
1283
+ # 422 (Pydantic validation) ๋˜๋Š” 400 (Bad Request) ํ—ˆ์šฉ, 500์€ ๋ถˆ๊ฐ€
1284
+ rest_ok = code_rest in (400, 422)
1285
+ sse_ok = code_sse in (400, 422)
1286
+ no_500 = code_rest != 500 and code_sse != 500
1287
+
1288
+ if no_500 and (rest_ok or sse_ok):
1289
+ return _record(
1290
+ 11,
1291
+ "Empty Query Handling",
1292
+ 5,
1293
+ "passed",
1294
+ elapsed,
1295
+ attempt_idx + 1,
1296
+ assertions=assertions,
1297
+ detail={"rest_code": code_rest, "sse_code": code_sse},
1298
+ )
1299
+
1300
+ if not no_500:
1301
+ last_error = f"got 500 (rest={code_rest}, sse={code_sse})"
1302
+ else:
1303
+ last_error = f"unexpected codes: rest={code_rest}, sse={code_sse}"
1304
+
1305
+ if attempt_idx < 1:
1306
+ continue
1307
+ return _record(
1308
+ 11,
1309
+ "Empty Query Handling",
1310
+ 5,
1311
+ "failed",
1312
+ elapsed,
1313
+ attempt_idx + 1,
1314
+ assertions=assertions,
1315
+ error=last_error,
1316
+ detail={"rest_code": code_rest, "sse_code": code_sse},
1317
+ )
1318
+
1319
+ except Exception as exc:
1320
+ last_error = str(exc)
1321
+
1322
+ return _record(
1323
+ 11, "Empty Query Handling", 5, "failed", time.monotonic() - t0, 2, error=last_error
1324
+ )
1325
+
1326
+
1327
+ async def scenario12_reject_flow() -> dict:
1328
+ """Scenario 12: Reject Flow Completeness."""
1329
+ last_error = ""
1330
+
1331
+ for attempt_idx in range(2):
1332
+ t0 = time.monotonic()
1333
+ try:
1334
+ sid = _session_id(12)
1335
+ ok, text, meta, err = await _call_agent_with_approval(
1336
+ "๋ฏผ์› ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”", sid, approve=False, timeout=30
1337
+ )
1338
+ elapsed = time.monotonic() - t0
1339
+
1340
+ assertions = []
1341
+
1342
+ # reject ํ›„์—๋Š” tool_results๊ฐ€ ๋น„์–ด์žˆ์–ด์•ผ ํ•จ
1343
+ tool_results = meta.get("tool_results", {})
1344
+
1345
+ if ok:
1346
+ assertions.append("reject flow completed")
1347
+
1348
+ if not tool_results:
1349
+ assertions.append("tool_results empty after reject")
1350
+ else:
1351
+ assertions.append(f"tool_results NOT empty: {list(tool_results.keys())}")
1352
+
1353
+ if elapsed < 5:
1354
+ assertions.append(f"response < 5s ({elapsed:.1f}s)")
1355
+ else:
1356
+ assertions.append(f"response >= 5s ({elapsed:.1f}s)")
1357
+
1358
+ return _record(
1359
+ 12,
1360
+ "Reject Flow Completeness",
1361
+ 5,
1362
+ "passed",
1363
+ elapsed,
1364
+ attempt_idx + 1,
1365
+ assertions=assertions,
1366
+ detail={"text_preview": text[:100], "tool_results": tool_results, "meta": meta},
1367
+ )
1368
+
1369
+ last_error = err or "reject flow failed"
1370
+ if attempt_idx < 1:
1371
+ continue
1372
+ return _record(
1373
+ 12,
1374
+ "Reject Flow Completeness",
1375
+ 5,
1376
+ "failed",
1377
+ elapsed,
1378
+ attempt_idx + 1,
1379
+ assertions=assertions,
1380
+ error=last_error,
1381
+ detail={"meta": meta},
1382
+ )
1383
+
1384
+ except Exception as exc:
1385
+ last_error = str(exc)
1386
+
1387
+ return _record(
1388
+ 12, "Reject Flow Completeness", 5, "failed", time.monotonic() - t0, 2, error=last_error
1389
+ )
1390
+
1391
+
1392
+ async def scenario13_concurrent_isolation() -> dict:
1393
+ """Scenario 13: Concurrent Request Isolation (3 simultaneous requests)."""
1394
+ t0 = time.monotonic()
1395
+
1396
+ queries = [
1397
+ ("์ฃผ์ฐจ ์œ„๋ฐ˜ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜", _session_id(13)),
1398
+ ("์†Œ์Œ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•ด์ค˜", _session_id(13)),
1399
+ ("๋„๋กœ ํŒŒ์† ๋ฏผ์› ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•ด์ค˜", _session_id(13)),
1400
+ ]
1401
+
1402
+ async def _run_one(query: str, sid: str) -> dict:
1403
+ try:
1404
+ ok, text, meta, err = await _call_agent_with_approval(
1405
+ query, sid, approve=True, timeout=300
1406
+ )
1407
+ if meta.get("planned_tools"):
1408
+ _observed_tools.update(meta["planned_tools"])
1409
+ return {
1410
+ "session_id": sid,
1411
+ "ok": ok,
1412
+ "text_len": len(text),
1413
+ "error": err,
1414
+ "query": query[:20],
1415
+ }
1416
+ except Exception as exc:
1417
+ return {
1418
+ "session_id": sid,
1419
+ "ok": False,
1420
+ "text_len": 0,
1421
+ "error": str(exc),
1422
+ "query": query[:20],
1423
+ }
1424
+
1425
+ tasks = [_run_one(q, s) for q, s in queries]
1426
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1427
+
1428
+ elapsed = time.monotonic() - t0
1429
+
1430
+ # ๊ฒฐ๊ณผ ์ •๋ฆฌ
1431
+ sub_results = []
1432
+ valid_count = 0
1433
+ for r in results:
1434
+ if isinstance(r, Exception):
1435
+ sub_results.append({"ok": False, "error": str(r)})
1436
+ else:
1437
+ sub_results.append(r)
1438
+ if r.get("ok"):
1439
+ valid_count += 1
1440
+
1441
+ # session_id ๊ต์ฐจ ์˜ค์—ผ ํ™•์ธ (์—ฌ๊ธฐ์„œ๋Š” ๊ฐ๊ฐ ๋…๋ฆฝ์  session_id)
1442
+ session_ids = [q[1] for q in queries]
1443
+ all_unique = len(set(session_ids)) == len(session_ids)
1444
+
1445
+ assertions = [
1446
+ f"{valid_count}/3 concurrent requests succeeded",
1447
+ f"session_ids unique: {all_unique}",
1448
+ ]
1449
+
1450
+ if valid_count == 3:
1451
+ return _record(
1452
+ 13,
1453
+ "Concurrent Request Isolation",
1454
+ 5,
1455
+ "passed",
1456
+ elapsed,
1457
+ assertions=assertions,
1458
+ detail={"sub_results": sub_results},
1459
+ )
1460
+ return _record(
1461
+ 13,
1462
+ "Concurrent Request Isolation",
1463
+ 5,
1464
+ "failed",
1465
+ elapsed,
1466
+ assertions=assertions,
1467
+ error=f"only {valid_count}/3 succeeded",
1468
+ detail={"sub_results": sub_results},
1469
+ )
1470
+
1471
+
1472
+ # ---------------------------------------------------------------------------
1473
+ # Cold Start ๋Œ€๊ธฐ
1474
+ # ---------------------------------------------------------------------------
1475
+
1476
+
1477
+ async def _wait_cold_start() -> float:
1478
+ """์„œ๋ฒ„ cold start ๋Œ€๊ธฐ. ์ตœ๋Œ€ 10ํšŒ x 30์ดˆ ๊ฐ„๊ฒฉ. ๋Œ€๊ธฐํ•œ ์ด ์‹œ๊ฐ„์„ ๋ฐ˜ํ™˜."""
1479
+ total_wait = 0.0
1480
+ for i in range(10):
1481
+ try:
1482
+ code, body = await http_get("/health", timeout=10)
1483
+ if code == 200 and body.get("status") in ("ok", "healthy"):
1484
+ print(f" ์„œ๋ฒ„ ์ค€๋น„ ์™„๋ฃŒ (๋Œ€๊ธฐ {total_wait:.0f}s)")
1485
+ return total_wait
1486
+ except Exception:
1487
+ pass
1488
+ if i < 9:
1489
+ print(f" ์„œ๋ฒ„ ๋Œ€๊ธฐ ์ค‘... ({i + 1}/10, 30s ํ›„ ์žฌ์‹œ๋„)")
1490
+ await asyncio.sleep(30)
1491
+ total_wait += 30
1492
+
1493
+ print(" [WARN] ์„œ๋ฒ„ ์ค€๋น„ ํ™•์ธ ์‹คํŒจ โ€” ๊ณ„์† ์ง„ํ–‰")
1494
+ return total_wait
1495
+
1496
+
1497
+ # ---------------------------------------------------------------------------
1498
+ # ๋ฉ”์ธ ๋Ÿฌ๋„ˆ
1499
+ # ---------------------------------------------------------------------------
1500
+
1501
+
1502
+ async def main() -> int:
1503
+ print("=" * 60)
1504
+ print("GovOn E2E Tool Calling + AdapterRegistry ๊ฒ€์ฆ")
1505
+ print("=" * 60)
1506
+ print(f" ๋Œ€์ƒ ์„œ๋ฒ„: {BASE_URL}")
1507
+ print(f" ์ธ์ฆ: {'API_KEY ์„ค์ •๋จ' if API_KEY else '๋ฏธ์„ค์ • (๋น„์ธ์ฆ)'}")
1508
+ print(f" HTTP ๋ฐฑ์—”๋“œ: {_HTTP_BACKEND}")
1509
+ print(f" ํƒ€์ž„์•„์›ƒ: {TIMEOUT}s / ์‹œ๋‚˜๋ฆฌ์˜ค")
1510
+ print(f" run_id: {_run_id}")
1511
+ print("-" * 60)
1512
+
1513
+ # Cold start ๋Œ€๊ธฐ
1514
+ print("[Cold Start] ์„œ๋ฒ„ ์ค€๋น„ ํ™•์ธ ์ค‘...")
1515
+ cold_start_wait = await _wait_cold_start()
1516
+
1517
+ # ===== Phase 1: Infrastructure (hard gate) =====
1518
+ print("\n[Phase 1] Infrastructure (hard gate)")
1519
+ print("-" * 40)
1520
+
1521
+ phase1_scenarios = [
1522
+ scenario1_health_profile,
1523
+ scenario2_base_model_generation,
1524
+ scenario3_adapter_registry,
1525
+ ]
1526
+
1527
+ phase1_failed = False
1528
+ for fn in phase1_scenarios:
1529
+ result = await fn()
1530
+ if result["status"] == "failed":
1531
+ phase1_failed = True
1532
+
1533
+ if phase1_failed:
1534
+ print("\n" + "!" * 60)
1535
+ print("ABORT: Infrastructure not ready โ€” Phase 1 failed")
1536
+ print("!" * 60)
1537
+ _write_output(cold_start_wait)
1538
+ return 1
1539
+
1540
+ # ===== Phase 2: Agent Pipeline Core =====
1541
+ print("\n[Phase 2] Agent Pipeline Core")
1542
+ print("-" * 40)
1543
+
1544
+ phase2_scenarios = [
1545
+ scenario4_planner_valid_plan,
1546
+ scenario5_civil_lora_draft,
1547
+ scenario6_legal_lora_evidence,
1548
+ scenario7_task_type_classification,
1549
+ ]
1550
+
1551
+ for fn in phase2_scenarios:
1552
+ await fn()
1553
+
1554
+ # ===== Phase 3: data.go.kr API Tools (soft gate) =====
1555
+ print("\n[Phase 3] data.go.kr API Tools (soft gate)")
1556
+ print("-" * 40)
1557
+
1558
+ print(" data.go.kr ์—ฐ๊ฒฐ ํ™•์ธ...")
1559
+ datago_ok = await _check_datago_connectivity()
1560
+ if datago_ok:
1561
+ print(" data.go.kr ์—ฐ๊ฒฐ ๊ฐ€๋Šฅ")
1562
+ else:
1563
+ print(" data.go.kr ์—ฐ๊ฒฐ ๋ถˆ๊ฐ€ โ€” Phase 3 ์Šคํ‚ต")
1564
+
1565
+ await scenario8_external_api_tools()
1566
+
1567
+ # ===== Phase 4: Adapter Dynamics =====
1568
+ print("\n[Phase 4] Adapter Dynamics")
1569
+ print("-" * 40)
1570
+
1571
+ await scenario9_sequential_adapter_switching()
1572
+ await scenario10_lora_id_consistency()
1573
+
1574
+ # ===== Phase 5: Robustness =====
1575
+ print("\n[Phase 5] Robustness")
1576
+ print("-" * 40)
1577
+
1578
+ phase5_scenarios = [
1579
+ scenario11_empty_query,
1580
+ scenario12_reject_flow,
1581
+ scenario13_concurrent_isolation,
1582
+ ]
1583
+
1584
+ for fn in phase5_scenarios:
1585
+ await fn()
1586
+
1587
+ # ===== ์š”์•ฝ =====
1588
+ print("\n" + "=" * 60)
1589
+ passed = sum(1 for r in _results if r["status"] == "passed")
1590
+ failed = sum(1 for r in _results if r["status"] == "failed")
1591
+ skipped = sum(1 for r in _results if r["status"] == "skipped")
1592
+ total = len(_results)
1593
+
1594
+ print(f"๊ฒฐ๊ณผ: {passed}/{total} ํ†ต๊ณผ, {failed} ์‹คํŒจ, {skipped} ์Šคํ‚ต")
1595
+
1596
+ tool_ratio = len(_observed_tools) / len(VALID_TOOLS) if VALID_TOOLS else 0
1597
+ print(f"๋„๊ตฌ ์ปค๋ฒ„๋ฆฌ์ง€: {len(_observed_tools)}/{len(VALID_TOOLS)} ({tool_ratio:.0%})")
1598
+ if _observed_tools:
1599
+ print(f" ๊ด€์ธก๋œ ๋„๊ตฌ: {sorted(_observed_tools)}")
1600
+
1601
+ _write_output(cold_start_wait)
1602
+
1603
+ return 0 if failed == 0 else 1
1604
+
1605
+
1606
+ def _write_output(cold_start_wait: float) -> None:
1607
+ """JSON ๊ฒฐ๊ณผ ํŒŒ์ผ ์ถœ๋ ฅ."""
1608
+ from datetime import datetime, timezone
1609
+
1610
+ passed = sum(1 for r in _results if r["status"] == "passed")
1611
+ failed = sum(1 for r in _results if r["status"] == "failed")
1612
+ skipped = sum(1 for r in _results if r["status"] == "skipped")
1613
+
1614
+ tool_ratio = len(_observed_tools) / len(VALID_TOOLS) if VALID_TOOLS else 0
1615
+
1616
+ output = {
1617
+ "meta": {
1618
+ "run_id": _run_id,
1619
+ "timestamp_utc": datetime.now(timezone.utc).isoformat(),
1620
+ "target_url": BASE_URL,
1621
+ "cold_start_wait_seconds": cold_start_wait,
1622
+ },
1623
+ "summary": {
1624
+ "total": len(_results),
1625
+ "passed": passed,
1626
+ "failed": failed,
1627
+ "skipped": skipped,
1628
+ "tool_coverage": {
1629
+ "observed": sorted(_observed_tools),
1630
+ "ratio": round(tool_ratio, 2),
1631
+ },
1632
+ },
1633
+ "scenarios": _results,
1634
+ "server_url": BASE_URL,
1635
+ "http_backend": _HTTP_BACKEND,
1636
+ }
1637
+
1638
+ with open(RESULTS_PATH, "w", encoding="utf-8") as f:
1639
+ json.dump(output, f, ensure_ascii=False, indent=2)
1640
+ print(f"\n๊ฒฐ๊ณผ ์ €์žฅ: {RESULTS_PATH}")
1641
+
1642
+
1643
+ if __name__ == "__main__":
1644
+ exit_code = asyncio.run(main())
1645
+ sys.exit(exit_code)
scripts/verify_lora_serving.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """GovOn Legal LoRA ์–ด๋Œ‘ํ„ฐ ์„œ๋น™ ํ†ตํ•ฉ ๊ฒ€์ฆ ์Šคํฌ๋ฆฝํŠธ.
3
+
4
+ HuggingFace Space์— ๋ฐฐํฌ๋œ govon-runtime ์„œ๋ฒ„์— ๋Œ€ํ•ด
5
+ legal/civil adapter Multi-LoRA ์„œ๋น™ ๋™์ž‘์„ ๊ฒ€์ฆํ•œ๋‹ค.
6
+
7
+ ์‚ฌ์šฉ๋ฒ•:
8
+ GOVON_RUNTIME_URL=https://<space-url>.hf.space python3 scripts/verify_lora_serving.py
9
+ GOVON_RUNTIME_URL=https://<space-url>.hf.space API_KEY=<key> python3 scripts/verify_lora_serving.py
10
+
11
+ ์—”๋“œํฌ์ธํŠธ ์ฐธ๊ณ  (src/inference/api_server.py):
12
+ GET /health โ€” ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ (status: "healthy")
13
+ POST /v1/completions โ€” OpenAI-compatible (vLLM ์ง์ ‘ ์ œ๊ณต)
14
+ POST /v1/generate โ€” GovOn ๋ ˆ๊ฑฐ์‹œ ์ƒ์„ฑ ์—”๋“œํฌ์ธํŠธ
15
+ POST /v2/agent/run โ€” LangGraph agent (REST, interrupt๊นŒ์ง€ ์‹คํ–‰)
16
+ POST /v2/agent/stream โ€” LangGraph agent (SSE ์ŠคํŠธ๋ฆฌ๋ฐ)
17
+ GET /v1/models โ€” OpenAI-compatible ๋ชจ๋ธ ๋ชฉ๋ก (vLLM ์ง์ ‘ ์ œ๊ณต)
18
+
19
+ AgentRunRequest ํ•„๋“œ:
20
+ query: str โ€” ์‚ฌ์šฉ์ž ์ž…๋ ฅ (ํ•„์ˆ˜)
21
+ session_id: str โ€” ์„ธ์…˜ ์‹๋ณ„์ž (์„ ํƒ)
22
+ stream: bool โ€” ์ŠคํŠธ๋ฆฌ๋ฐ ์—ฌ๋ถ€ (๊ธฐ๋ณธ๊ฐ’ False)
23
+ force_tools: list โ€” ๊ฐ•์ œ ์‹คํ–‰ ๋„๊ตฌ ๋ชฉ๋ก (์„ ํƒ)
24
+ max_tokens: int โ€” ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜ (๊ธฐ๋ณธ๊ฐ’ 512)
25
+ temperature: float โ€” ์˜จ๋„ (๊ธฐ๋ณธ๊ฐ’ 0.7)
26
+ use_rag: bool โ€” RAG ์‚ฌ์šฉ ์—ฌ๋ถ€ (๊ธฐ๋ณธ๊ฐ’ True)
27
+ """
28
+
29
+ # stdlib
30
+ import asyncio
31
+ import json
32
+ import logging
33
+ import os
34
+ import re
35
+ import sys
36
+ import time
37
+ from typing import Any, Optional
38
+ from uuid import uuid4
39
+
40
+ BASE_URL = os.environ.get("GOVON_RUNTIME_URL", "http://localhost:7860").rstrip("/")
41
+ API_KEY = os.environ.get("API_KEY")
42
+ TIMEOUT = 300 # ์‹œ๋‚˜๋ฆฌ์˜ค๋‹น ์ตœ๋Œ€ ๋Œ€๊ธฐ ์‹œ๊ฐ„ (์ดˆ)
43
+ BASE_MODEL = "LGAI-EXAONE/EXAONE-4.0-32B-AWQ"
44
+ RESULTS_PATH = "verify_results.json"
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+ # ๋ฒ•๋ น ๊ด€๋ จ ํŒจํ„ด (Scenario 4 ๊ฒ€์ฆ์šฉ) โ€” regex ๊ธฐ๋ฐ˜, ๋‹จ์ผ ๋ฌธ์ž ์ œ์™ธ
49
+ LEGAL_PATTERNS = [
50
+ r"์ œ\s*\d+\s*์กฐ",
51
+ r"์ œ\s*\d+\s*ํ•ญ",
52
+ r"๋ฒ•๋ฅ ",
53
+ r"์‹œํ–‰๋ น",
54
+ r"์กฐ๋ก€",
55
+ r"ํŒ๋ก€",
56
+ r"๋Œ€๋ฒ•์›",
57
+ ]
58
+
59
+ _results: list[dict] = []
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # HTTP ํด๋ผ์ด์–ธํŠธ ๋ ˆ์ด์–ด (httpx ์šฐ์„ , urllib fallback)
64
+ # ---------------------------------------------------------------------------
65
+
66
+
67
+ try:
68
+ import httpx
69
+
70
+ _HTTP_BACKEND = "httpx"
71
+
72
+ def _build_headers() -> dict:
73
+ h = {"Content-Type": "application/json", "Accept": "application/json"}
74
+ if API_KEY:
75
+ h["X-API-Key"] = API_KEY
76
+ return h
77
+
78
+ async def http_get(path: str) -> tuple[int, dict]:
79
+ url = BASE_URL + path
80
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
81
+ resp = await client.get(url, headers=_build_headers())
82
+ try:
83
+ return resp.status_code, resp.json()
84
+ except Exception:
85
+ return resp.status_code, {"_raw": resp.text[:200]}
86
+
87
+ async def http_post(path: str, body: dict) -> tuple[int, dict]:
88
+ url = BASE_URL + path
89
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
90
+ resp = await client.post(url, json=body, headers=_build_headers())
91
+ try:
92
+ return resp.status_code, resp.json()
93
+ except Exception:
94
+ return resp.status_code, {"_raw": resp.text[:200]}
95
+
96
+ async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
97
+ """SSE ์ŠคํŠธ๋ฆฌ๋ฐ POST. ์ฒญํฌ๋ฅผ ์ˆ˜์ง‘ํ•˜์—ฌ ํŒŒ์‹ฑ๋œ ์ด๋ฒคํŠธ ๋ชฉ๋ก์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค."""
98
+ url = BASE_URL + path
99
+ h = _build_headers()
100
+ h["Accept"] = "text/event-stream"
101
+ events: list[dict] = []
102
+ status_code = 0
103
+ async with httpx.AsyncClient(timeout=TIMEOUT) as client:
104
+ async with client.stream("POST", url, json=body, headers=h) as resp:
105
+ status_code = resp.status_code
106
+ async for line in resp.aiter_lines():
107
+ line = line.strip()
108
+ if not line.startswith("data:"):
109
+ continue
110
+ payload = line[len("data:") :].strip()
111
+ if not payload:
112
+ continue
113
+ try:
114
+ events.append(json.loads(payload))
115
+ except json.JSONDecodeError:
116
+ events.append({"_raw": payload})
117
+ return status_code, events
118
+
119
+ except ImportError:
120
+ import urllib.error
121
+ import urllib.request
122
+
123
+ _HTTP_BACKEND = "urllib"
124
+
125
+ def _build_headers() -> dict:
126
+ h = {"Content-Type": "application/json", "Accept": "application/json"}
127
+ if API_KEY:
128
+ h["X-API-Key"] = API_KEY
129
+ return h
130
+
131
+ async def http_get(path: str) -> tuple[int, dict]:
132
+ url = BASE_URL + path
133
+ req = urllib.request.Request(url, headers=_build_headers(), method="GET")
134
+ try:
135
+ with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
136
+ return r.status, json.loads(r.read().decode())
137
+ except urllib.error.HTTPError as e:
138
+ return e.code, {}
139
+
140
+ async def http_post(path: str, body: dict) -> tuple[int, dict]:
141
+ url = BASE_URL + path
142
+ data = json.dumps(body).encode()
143
+ req = urllib.request.Request(url, data=data, headers=_build_headers(), method="POST")
144
+ try:
145
+ with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
146
+ return r.status, json.loads(r.read().decode())
147
+ except urllib.error.HTTPError as e:
148
+ return e.code, {}
149
+
150
+ async def http_post_sse(path: str, body: dict) -> tuple[int, list[dict]]:
151
+ """urllib fallback: SSE ์ŠคํŠธ๋ฆฌ๋ฐ์„ ๋™๊ธฐ ๋ฐฉ์‹์œผ๋กœ ์ฝ๋Š”๋‹ค."""
152
+ url = BASE_URL + path
153
+ data = json.dumps(body).encode()
154
+ h = _build_headers()
155
+ h["Accept"] = "text/event-stream"
156
+ req = urllib.request.Request(url, data=data, headers=h, method="POST")
157
+ events: list[dict] = []
158
+ status_code = 0
159
+ try:
160
+ with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
161
+ status_code = r.status
162
+ for raw_line in r:
163
+ line = raw_line.decode("utf-8", errors="replace").strip()
164
+ if not line.startswith("data:"):
165
+ continue
166
+ payload = line[len("data:") :].strip()
167
+ if not payload:
168
+ continue
169
+ try:
170
+ events.append(json.loads(payload))
171
+ except json.JSONDecodeError:
172
+ events.append({"_raw": payload})
173
+ except urllib.error.HTTPError as e:
174
+ status_code = e.code
175
+ return status_code, events
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # ๊ฒฐ๊ณผ ๊ธฐ๋ก / ์ถœ๋ ฅ ํ—ฌํผ
180
+ # ---------------------------------------------------------------------------
181
+
182
+
183
+ def _record(
184
+ scenario_num: int,
185
+ name: str,
186
+ passed: bool,
187
+ elapsed: float,
188
+ error: Optional[str] = None,
189
+ detail: Optional[Any] = None,
190
+ ) -> dict:
191
+ tag = "[PASS]" if passed else "[FAIL]"
192
+ suffix = f"({elapsed:.2f}s)"
193
+ if passed:
194
+ print(f"{tag} Scenario {scenario_num}: {name} {suffix}")
195
+ else:
196
+ print(f"{tag} Scenario {scenario_num}: {name} โ€” {error} {suffix}")
197
+
198
+ entry = {
199
+ "scenario": scenario_num,
200
+ "name": name,
201
+ "passed": passed,
202
+ "elapsed_s": round(elapsed, 3),
203
+ "error": error,
204
+ "detail": detail,
205
+ }
206
+ _results.append(entry)
207
+ return entry
208
+
209
+
210
+ def _extract_text_from_events(events: list[dict]) -> str:
211
+ """SSE ์ด๋ฒคํŠธ ๋ชฉ๋ก์—์„œ ์ตœ์ข… ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•œ๋‹ค.
212
+
213
+ v2/agent/stream ์ด๋ฒคํŠธ ๊ตฌ์กฐ:
214
+ - synthesis ๋…ธ๋“œ: {"node": "synthesis", "final_text": "..."}
215
+ - v1/agent/stream ์ด๋ฒคํŠธ: {"text": "...", "finished": true}
216
+ """
217
+ # synthesis ๋…ธ๋“œ final_text ์šฐ์„ 
218
+ for ev in reversed(events):
219
+ if ev.get("node") == "synthesis" and ev.get("final_text"):
220
+ return ev["final_text"]
221
+ # v1 ์ŠคํŠธ๋ฆฌ๋ฐ ํ˜ธํ™˜: finished=true์ธ ๋งˆ์ง€๋ง‰ ์ด๋ฒคํŠธ์˜ text
222
+ for ev in reversed(events):
223
+ if ev.get("finished") and ev.get("text"):
224
+ return ev["text"]
225
+ # ์ „์ฒด ์ด๋ฒคํŠธ์—์„œ non-empty text๋ฅผ ์ด์–ด๋ถ™์ธ๋‹ค (fallback)
226
+ chunks = [ev.get("text", "") or ev.get("final_text", "") for ev in events]
227
+ return "".join(c for c in chunks if c)
228
+
229
+
230
+ def _contains_legal_keyword(text: str) -> bool:
231
+ return any(re.search(pattern, text) for pattern in LEGAL_PATTERNS)
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # ์‹œ๋‚˜๋ฆฌ์˜ค ๊ตฌํ˜„
236
+ # ---------------------------------------------------------------------------
237
+
238
+
239
+ async def scenario1_health_check() -> dict:
240
+ """Scenario 1: Health Check."""
241
+ t0 = time.monotonic()
242
+ try:
243
+ status_code, body = await http_get("/health")
244
+ elapsed = time.monotonic() - t0
245
+
246
+ if status_code != 200:
247
+ return _record(1, "Health Check", False, elapsed, f"HTTP {status_code}", {"body": body})
248
+
249
+ # api_server.py: /health๋Š” "status": "healthy" ๋ฐ˜ํ™˜
250
+ srv_status = body.get("status", "")
251
+ if srv_status not in ("ok", "healthy"):
252
+ return _record(
253
+ 1,
254
+ "Health Check",
255
+ False,
256
+ elapsed,
257
+ f"status ํ•„๋“œ๊ฐ€ ok/healthy๊ฐ€ ์•„๋‹˜: {srv_status!r}",
258
+ {"body": body},
259
+ )
260
+
261
+ return _record(1, "Health Check", True, elapsed, detail={"status": srv_status})
262
+ except Exception as exc:
263
+ return _record(1, "Health Check", False, time.monotonic() - t0, str(exc))
264
+
265
+
266
+ async def scenario2_base_model_generation() -> dict:
267
+ """Scenario 2: Base Model Generation (OpenAI-compatible /v1/completions).
268
+
269
+ vLLM์ด /v1/completions ์—”๋“œํฌ์ธํŠธ๋ฅผ ์ง์ ‘ ๋…ธ์ถœํ•œ๋‹ค.
270
+ GovOn api_server.py์— ๊ตฌํ˜„๋˜์–ด ์žˆ์ง€ ์•Š์œผ๋ฏ€๋กœ vLLM ๋ ˆ์ด์–ด ์—”๋“œํฌ์ธํŠธ๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.
271
+ ์„œ๋ฒ„๊ฐ€ /v1/completions๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฉด /v1/generate ๋ ˆ๊ฑฐ์‹œ๋กœ fallbackํ•œ๋‹ค.
272
+ """
273
+ t0 = time.monotonic()
274
+ body_completions = {
275
+ "model": BASE_MODEL,
276
+ "prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ ์ˆ˜๋„๋Š” ์–ด๋””์ž…๋‹ˆ๊นŒ?",
277
+ "max_tokens": 64,
278
+ "temperature": 0.0,
279
+ }
280
+ try:
281
+ status_code, resp = await http_post("/v1/completions", body_completions)
282
+ elapsed = time.monotonic() - t0
283
+
284
+ # vLLM /v1/completions ์‘๋‹ต ๊ตฌ์กฐ ํ™•์ธ
285
+ if status_code == 200:
286
+ choices = resp.get("choices", [])
287
+ if choices and choices[0].get("text") is not None:
288
+ text = choices[0]["text"]
289
+ return _record(
290
+ 2,
291
+ "Base Model Generation",
292
+ True,
293
+ elapsed,
294
+ detail={"endpoint": "/v1/completions", "text_preview": text[:100]},
295
+ )
296
+ return _record(
297
+ 2, "Base Model Generation", False, elapsed, "choices[0].text ์—†์Œ", {"resp": resp}
298
+ )
299
+
300
+ # /v1/completions ๋ฏธ์ง€์› ์‹œ /v1/generate ๋ ˆ๊ฑฐ์‹œ๋กœ fallback
301
+ body_legacy = {
302
+ "prompt": "๋Œ€ํ•œ๋ฏผ๊ตญ ์ˆ˜๋„๋Š” ์–ด๋””์ž…๋‹ˆ๊นŒ?",
303
+ "max_tokens": 64,
304
+ "temperature": 0.0,
305
+ "use_rag": False,
306
+ }
307
+ status_code2, resp2 = await http_post("/v1/generate", body_legacy)
308
+ elapsed2 = time.monotonic() - t0
309
+ if status_code2 == 200 and resp2.get("text"):
310
+ return _record(
311
+ 2,
312
+ "Base Model Generation",
313
+ True,
314
+ elapsed2,
315
+ detail={"endpoint": "/v1/generate (fallback)", "text_preview": resp2["text"][:100]},
316
+ )
317
+
318
+ return _record(
319
+ 2,
320
+ "Base Model Generation",
321
+ False,
322
+ elapsed2,
323
+ f"/v1/completions HTTP {status_code}, /v1/generate HTTP {status_code2}",
324
+ {"completions_resp": resp, "generate_resp": resp2},
325
+ )
326
+ except Exception as exc:
327
+ return _record(2, "Base Model Generation", False, time.monotonic() - t0, str(exc))
328
+
329
+
330
+ async def _call_agent(
331
+ message: str,
332
+ session_id: str,
333
+ use_stream: bool = True,
334
+ ) -> tuple[bool, str, Optional[str]]:
335
+ """์—์ด์ „ํŠธ ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ˜ธ์ถœํ•˜๊ณ  (์„ฑ๊ณต์—ฌ๋ถ€, ์‘๋‹ตํ…์ŠคํŠธ, ์—๋Ÿฌ) ๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
336
+
337
+ v2/agent/stream (SSE) โ†’ v2/agent/run (REST) ์ˆœ์œผ๋กœ ์‹œ๋„ํ•œ๋‹ค.
338
+ use_rag=False๋ฅผ ๊ธฐ๋ณธ์œผ๋กœ ์ „๋‹ฌํ•˜์—ฌ LoRA ๊ฒฝ๋กœ๋ฅผ ๊ฐ•์ œํ•œ๋‹ค.
339
+ """
340
+ body = {"query": message, "session_id": session_id, "use_rag": False}
341
+
342
+ # v2/agent/stream ์‹œ๋„ (SSE)
343
+ if use_stream:
344
+ try:
345
+ status_code, events = await http_post_sse("/v2/agent/stream", body)
346
+ if status_code == 200 and events:
347
+ text = _extract_text_from_events(events)
348
+ if text:
349
+ return True, text, None
350
+ # ์ด๋ฒคํŠธ๋Š” ์ˆ˜์‹ ํ–ˆ์ง€๋งŒ text๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ โ€” error ์ด๋ฒคํŠธ ํ™•์ธ
351
+ for ev in events:
352
+ if ev.get("status") == "error":
353
+ return False, "", ev.get("error", "unknown error")
354
+ # __interrupt__ ๋˜๋Š” awaiting_approval ์ด๋ฒคํŠธ โ†’ ์ž๋™ ์Šน์ธ ํ›„ ์ตœ์ข… ํ…์ŠคํŠธ ์ˆ˜์ง‘
355
+ # LangGraph interrupt()๋Š” "__interrupt__" ๋…ธ๋“œ๋กœ emit๋จ
356
+ awaiting = next(
357
+ (
358
+ ev
359
+ for ev in events
360
+ if ev.get("status") == "awaiting_approval"
361
+ or ev.get("node") == "__interrupt__"
362
+ ),
363
+ None,
364
+ )
365
+ if awaiting:
366
+ thread_id = awaiting.get("thread_id") or session_id
367
+ try:
368
+ approve_code, approve_resp = await http_post(
369
+ f"/v2/agent/approve?thread_id={thread_id}&approved=true", {}
370
+ )
371
+ if approve_code == 200:
372
+ final_text = approve_resp.get("text", "") or approve_resp.get(
373
+ "final_text", ""
374
+ )
375
+ if final_text:
376
+ return True, final_text, None
377
+ return False, "", f"approve 200 but text ์—†์Œ: {approve_resp}"
378
+ return False, "", f"approve HTTP {approve_code}: {approve_resp}"
379
+ except Exception as approve_exc:
380
+ return False, "", f"approve ํ˜ธ์ถœ ์‹คํŒจ: {approve_exc}"
381
+ return False, "", f"SSE ์ด๋ฒคํŠธ ์ˆ˜์‹ ํ–ˆ์œผ๋‚˜ text ์—†์Œ (events={len(events)})"
382
+ except Exception as exc:
383
+ logger.warning("Stream error: %s", exc) # fallback to /v2/agent/run
384
+
385
+ # v2/agent/run ์‹œ๋„ (REST)
386
+ try:
387
+ status_code, resp = await http_post("/v2/agent/run", body)
388
+ if status_code == 200:
389
+ text = resp.get("text", "") or resp.get("final_text", "")
390
+ if resp.get("status") == "error":
391
+ return False, text, resp.get("error", "agent run error")
392
+ if text:
393
+ return True, text, None
394
+ # awaiting_approval ์ƒํƒœ โ€” ์‹ค์ œ ํ…์ŠคํŠธ ์ƒ์„ฑ ์—†์Œ์œผ๋กœ failure ์ฒ˜๋ฆฌ
395
+ if resp.get("status") == "awaiting_approval":
396
+ return (
397
+ False,
398
+ "",
399
+ f"awaiting_approval: ํ…์ŠคํŠธ ๋ฏธ์ƒ์„ฑ (thread_id={resp.get('thread_id')})",
400
+ )
401
+ return False, "", f"text ์—†์Œ, status={resp.get('status')}"
402
+ return False, "", f"HTTP {status_code}: {resp}"
403
+ except Exception as exc:
404
+ return False, "", str(exc)
405
+
406
+
407
+ # Scenario 3/4 ๊ณต์œ  ์„ธ์…˜ ID (๋™์ผ run์—์„œ ๊ฐ™์€ ์„ธ์…˜ ์‚ฌ์šฉ)
408
+ _RUN_SESSION_ID = str(uuid4())
409
+
410
+
411
+ async def scenario3_civil_lora() -> dict:
412
+ """Scenario 3: Civil LoRA โ€” draft_civil_response (v2/agent/stream)."""
413
+ t0 = time.monotonic()
414
+ try:
415
+ ok, text, err = await _call_agent(
416
+ message="์ฃผ์ฐจ ์œ„๋ฐ˜ ๊ณผํƒœ๋ฃŒ ์ด์˜์‹ ์ฒญ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
417
+ session_id=_RUN_SESSION_ID,
418
+ )
419
+ elapsed = time.monotonic() - t0
420
+ if not ok:
421
+ return _record(
422
+ 3,
423
+ "Civil LoRA (draft_civil_response)",
424
+ False,
425
+ elapsed,
426
+ err,
427
+ {"text_preview": text[:200] if text else ""},
428
+ )
429
+ if not text.strip():
430
+ return _record(
431
+ 3, "Civil LoRA (draft_civil_response)", False, elapsed, "์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Œ"
432
+ )
433
+ return _record(
434
+ 3,
435
+ "Civil LoRA (draft_civil_response)",
436
+ True,
437
+ elapsed,
438
+ detail={"text_preview": text[:200]},
439
+ )
440
+ except Exception as exc:
441
+ return _record(
442
+ 3, "Civil LoRA (draft_civil_response)", False, time.monotonic() - t0, str(exc)
443
+ )
444
+
445
+
446
+ async def scenario4_legal_lora() -> dict:
447
+ """Scenario 4: Legal LoRA โ€” append_evidence (v2/agent/stream).
448
+
449
+ ๋…๋ฆฝ ์„ธ์…˜์—์„œ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ ์š”์ฒญ ํ›„ ๋™์ผ ์„ธ์…˜์—์„œ ๋ฒ•๋ น ๊ทผ๊ฑฐ ๋ณด๊ฐ•์„ ์š”์ฒญํ•œ๋‹ค.
450
+ ์‘๋‹ต์— ๋ฒ•๋ น/์กฐํ•ญ ๊ด€๋ จ ํŒจํ„ด์ด ํฌํ•จ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•œ๋‹ค.
451
+ """
452
+ t0 = time.monotonic()
453
+ session_id = str(uuid4())
454
+ try:
455
+ # ๋™์ผ ์„ธ์…˜์—์„œ civil ์š”์ฒญ ๋จผ์ € (append_evidence๋Š” ์ด์ „ ๋‹ต๋ณ€ ์ปจํ…์ŠคํŠธ ํ•„์š”)
456
+ ok_civil, _, err_civil = await _call_agent(
457
+ message="๊ฑด์ถ• ํ—ˆ๊ฐ€ ์‹ ์ฒญ ๋ฏผ์›์— ๋Œ€ํ•œ ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
458
+ session_id=session_id,
459
+ )
460
+ if not ok_civil:
461
+ elapsed = time.monotonic() - t0
462
+ return _record(
463
+ 4,
464
+ "Legal LoRA (append_evidence)",
465
+ False,
466
+ elapsed,
467
+ f"civil ์„ ํ–‰ ์š”์ฒญ ์‹คํŒจ: {err_civil}",
468
+ )
469
+
470
+ ok, text, err = await _call_agent(
471
+ message="์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น๊ณผ ํŒ๋ก€ ๊ทผ๊ฑฐ๋ฅผ ๋ณด๊ฐ•ํ•ด์ค˜",
472
+ session_id=session_id,
473
+ )
474
+ elapsed = time.monotonic() - t0
475
+ if not ok:
476
+ return _record(
477
+ 4,
478
+ "Legal LoRA (append_evidence)",
479
+ False,
480
+ elapsed,
481
+ err,
482
+ {"text_preview": text[:200] if text else ""},
483
+ )
484
+ if not text.strip():
485
+ return _record(
486
+ 4, "Legal LoRA (append_evidence)", False, elapsed, "์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Œ"
487
+ )
488
+
489
+ has_legal = _contains_legal_keyword(text)
490
+ matched = [p for p in LEGAL_PATTERNS if re.search(p, text)]
491
+ detail = {
492
+ "has_legal_keyword": has_legal,
493
+ "matched_patterns": matched,
494
+ "text_preview": text[:300],
495
+ }
496
+ if not has_legal:
497
+ return _record(
498
+ 4,
499
+ "Legal LoRA (append_evidence)",
500
+ False,
501
+ elapsed,
502
+ f"๋ฒ•๋ น ํŒจํ„ด ๋ฏธ๋ฐœ๊ฒฌ ({LEGAL_PATTERNS[:3]}...)",
503
+ detail,
504
+ )
505
+ return _record(4, "Legal LoRA (append_evidence)", True, elapsed, detail=detail)
506
+ except Exception as exc:
507
+ return _record(4, "Legal LoRA (append_evidence)", False, time.monotonic() - t0, str(exc))
508
+
509
+
510
+ async def scenario5_sequential_multi_lora_switching() -> dict:
511
+ """Scenario 5: Sequential Multi-LoRA Switching (civil โ†’ legal x3).
512
+
513
+ civil ์š”์ฒญ โ†’ legal ์š”์ฒญ์„ 3ํšŒ ๋ฐ˜๋ณตํ•˜์—ฌ LoRA ์ „ํ™˜ ์˜ค๋ฅ˜๊ฐ€ ์—†๋Š”์ง€ ํ™•์ธํ•œ๋‹ค.
514
+ ๋ฐ˜๋ณต๋งˆ๋‹ค ๋ณ„๋„์˜ UUID ์„ธ์…˜ ID๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.
515
+ """
516
+ t0 = time.monotonic()
517
+ errors: list[str] = []
518
+ iterations = 3
519
+
520
+ for i in range(1, iterations + 1):
521
+ session_id = str(uuid4())
522
+
523
+ # civil ์š”์ฒญ
524
+ ok, text, err = await _call_agent(
525
+ message="ํ–‰์ •์ฒ˜๋ถ„ ์ด์˜์‹ ์ฒญ ๋ฏผ์› ๋‹ต๋ณ€ ์ดˆ์•ˆ์„ ์ž‘์„ฑํ•ด์ค˜",
526
+ session_id=session_id,
527
+ )
528
+ if not ok or not text.strip():
529
+ errors.append(f"iter {i} civil: {err or '๋นˆ ์‘๋‹ต'}")
530
+ continue
531
+
532
+ # legal ์š”์ฒญ (๋™์ผ ์„ธ์…˜)
533
+ ok2, text2, err2 = await _call_agent(
534
+ message="์œ„ ๋‹ต๋ณ€์— ๊ด€๋ จ ๋ฒ•๋ น ๊ทผ๊ฑฐ๋ฅผ ์ถ”๊ฐ€ํ•ด์ค˜",
535
+ session_id=session_id,
536
+ )
537
+ if not ok2 or not text2.strip():
538
+ errors.append(f"iter {i} legal: {err2 or '๋นˆ ์‘๋‹ต'}")
539
+
540
+ elapsed = time.monotonic() - t0
541
+ if errors:
542
+ return _record(
543
+ 5,
544
+ "Sequential Multi-LoRA Switching",
545
+ False,
546
+ elapsed,
547
+ "; ".join(errors),
548
+ {"iterations": iterations, "errors": errors},
549
+ )
550
+ return _record(
551
+ 5,
552
+ "Sequential Multi-LoRA Switching",
553
+ True,
554
+ elapsed,
555
+ detail={"iterations": iterations, "all_passed": True},
556
+ )
557
+
558
+
559
+ async def scenario6_lora_id_consistency() -> dict:
560
+ """Scenario 6: LoRA ID Consistency Check (์ •๋ณด์„ฑ).
561
+
562
+ /v1/models (vLLM OpenAI-compatible)์—์„œ civil/legal ์–ด๋Œ‘ํ„ฐ ๋…ธ์ถœ ์—ฌ๋ถ€๋ฅผ ํ™•์ธํ•œ๋‹ค.
563
+ vLLM์€ ๋ฒ„์ „/์„ค์ •์— ๋”ฐ๋ผ LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ /v1/models์— ๋…ธ์ถœํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ,
564
+ ๋ฏธ๊ฐ์ง€ ์‹œ FAIL์ด ์•„๋‹Œ WARNING์œผ๋กœ ๊ธฐ๋กํ•˜๊ณ  ์ „์ฒด ๊ฒฐ๊ณผ์— ์˜ํ–ฅ์„ ์ฃผ์ง€ ์•Š๋Š”๋‹ค.
565
+ """
566
+ t0 = time.monotonic()
567
+ try:
568
+ status_code, health = await http_get("/health")
569
+ elapsed = time.monotonic() - t0
570
+
571
+ if status_code != 200:
572
+ return _record(
573
+ 6, "LoRA ID Consistency Check", False, elapsed, f"/health HTTP {status_code}"
574
+ )
575
+
576
+ detail: dict = {"health_status": health.get("status")}
577
+
578
+ # /health feature_flags / agents_loaded ์ •๋ณด ๊ธฐ๋ก
579
+ detail["agents_loaded"] = health.get("agents_loaded", [])
580
+ detail["model"] = health.get("model", "")
581
+ detail["feature_flags"] = health.get("feature_flags", {})
582
+
583
+ civil_found = False
584
+ legal_found = False
585
+
586
+ # /v1/models ์‹œ๋„ (vLLM OpenAI-compatible)
587
+ try:
588
+ models_status, models_resp = await http_get("/v1/models")
589
+ if models_status == 200:
590
+ model_ids = [m.get("id", "") for m in models_resp.get("data", [])]
591
+ detail["v1_models"] = model_ids
592
+ civil_found = any("civil" in mid for mid in model_ids)
593
+ legal_found = any("legal" in mid for mid in model_ids)
594
+ detail["civil_adapter_in_models"] = civil_found
595
+ detail["legal_adapter_in_models"] = legal_found
596
+ except Exception as exc:
597
+ logger.warning("Failed to fetch /v1/models: %s", exc)
598
+ detail["v1_models"] = "unavailable"
599
+
600
+ # vLLM์ด /v1/models์— ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋…ธ์ถœํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์ •๋ณด์„ฑ ๊ธฐ๋ก๋งŒ ์ˆ˜ํ–‰
601
+ if not civil_found or not legal_found:
602
+ missing = []
603
+ if not civil_found:
604
+ missing.append("civil")
605
+ if not legal_found:
606
+ missing.append("legal")
607
+ detail["warning"] = f"์–ด๋Œ‘ํ„ฐ ๋ฏธ๊ฐ์ง€ (vLLM ๋ฒ„์ „์— ๋”ฐ๋ผ ์ •์ƒ): {', '.join(missing)}"
608
+ logger.warning(detail["warning"])
609
+
610
+ return _record(6, "LoRA ID Consistency Check", True, time.monotonic() - t0, detail=detail)
611
+ except Exception as exc:
612
+ return _record(6, "LoRA ID Consistency Check", False, time.monotonic() - t0, str(exc))
613
+
614
+
615
+ # ---------------------------------------------------------------------------
616
+ # ๋ฉ”์ธ ๋Ÿฌ๋„ˆ
617
+ # ---------------------------------------------------------------------------
618
+
619
+
620
+ async def main() -> int:
621
+ print("GovOn Legal LoRA ์„œ๋น™ ํ†ตํ•ฉ ๊ฒ€์ฆ")
622
+ print(f" ๋Œ€์ƒ ์„œ๋ฒ„: {BASE_URL}")
623
+ print(f" ์ธ์ฆ: {'API_KEY ์„ค์ •๋จ' if API_KEY else '๋ฏธ์„ค์ • (๋น„์ธ์ฆ)'}")
624
+ print(f" HTTP ๋ฐฑ์—”๋“œ: {_HTTP_BACKEND}")
625
+ print(f" ํƒ€์ž„์•„์›ƒ: {TIMEOUT}s / ์‹œ๋‚˜๋ฆฌ์˜ค")
626
+ print("-" * 60)
627
+
628
+ scenarios = [
629
+ scenario1_health_check,
630
+ scenario2_base_model_generation,
631
+ scenario3_civil_lora,
632
+ scenario4_legal_lora,
633
+ scenario5_sequential_multi_lora_switching,
634
+ scenario6_lora_id_consistency,
635
+ ]
636
+
637
+ for fn in scenarios:
638
+ await fn()
639
+
640
+ print("-" * 60)
641
+ passed = sum(1 for r in _results if r["passed"])
642
+ failed = len(_results) - passed
643
+ print(f"๊ฒฐ๊ณผ: {passed}/{len(_results)} ํ†ต๊ณผ, {failed} ์‹คํŒจ")
644
+
645
+ # JSON ๊ฒฐ๊ณผ ์ €์žฅ
646
+ output = {
647
+ "server_url": BASE_URL,
648
+ "http_backend": _HTTP_BACKEND,
649
+ "total": len(_results),
650
+ "passed": passed,
651
+ "failed": failed,
652
+ "scenarios": _results,
653
+ }
654
+ with open(RESULTS_PATH, "w", encoding="utf-8") as f:
655
+ json.dump(output, f, ensure_ascii=False, indent=2)
656
+ print(f"๊ฒฐ๊ณผ ์ €์žฅ: {RESULTS_PATH}")
657
+
658
+ return 0 if failed == 0 else 1
659
+
660
+
661
+ if __name__ == "__main__":
662
+ exit_code = asyncio.run(main())
663
+ sys.exit(exit_code)
scripts/verify_results.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "server_url": "https://umyunsang-govon-runtime.hf.space",
3
+ "http_backend": "httpx",
4
+ "total": 6,
5
+ "passed": 5,
6
+ "failed": 1,
7
+ "scenarios": [
8
+ {
9
+ "scenario": 1,
10
+ "name": "Health Check",
11
+ "passed": true,
12
+ "elapsed_s": 1.092,
13
+ "error": null,
14
+ "detail": {
15
+ "status": "healthy"
16
+ }
17
+ },
18
+ {
19
+ "scenario": 2,
20
+ "name": "Base Model Generation",
21
+ "passed": true,
22
+ "elapsed_s": 7.963,
23
+ "error": null,
24
+ "detail": {
25
+ "endpoint": "/v1/generate (fallback)",
26
+ "text_preview": "We are dealing with a very basic factual question about the capital of South Korea. \n The user's que"
27
+ }
28
+ },
29
+ {
30
+ "scenario": 3,
31
+ "name": "Civil LoRA (draft_civil_response)",
32
+ "passed": true,
33
+ "elapsed_s": 38.797,
34
+ "error": null,
35
+ "detail": {
36
+ "text_preview": "์š”์ฒญ์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
37
+ }
38
+ },
39
+ {
40
+ "scenario": 4,
41
+ "name": "Legal LoRA (append_evidence)",
42
+ "passed": false,
43
+ "elapsed_s": 25.521,
44
+ "error": "๋ฒ•๋ น ํŒจํ„ด ๋ฏธ๋ฐœ๊ฒฌ (['์ œ\\\\s*\\\\d+\\\\s*์กฐ', '์ œ\\\\s*\\\\d+\\\\s*ํ•ญ', '๋ฒ•๋ฅ ']...)",
45
+ "detail": {
46
+ "has_legal_keyword": false,
47
+ "matched_patterns": [],
48
+ "text_preview": "์š”์ฒญ์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
49
+ }
50
+ },
51
+ {
52
+ "scenario": 5,
53
+ "name": "Sequential Multi-LoRA Switching",
54
+ "passed": true,
55
+ "elapsed_s": 146.962,
56
+ "error": null,
57
+ "detail": {
58
+ "iterations": 3,
59
+ "all_passed": true
60
+ }
61
+ },
62
+ {
63
+ "scenario": 6,
64
+ "name": "LoRA ID Consistency Check",
65
+ "passed": true,
66
+ "elapsed_s": 1.889,
67
+ "error": null,
68
+ "detail": {
69
+ "health_status": "healthy",
70
+ "agents_loaded": [
71
+ "generator_civil_response",
72
+ "retriever"
73
+ ],
74
+ "model": "LGAI-EXAONE/EXAONE-4.0-32B-AWQ",
75
+ "feature_flags": {
76
+ "use_rag_pipeline": true,
77
+ "model_version": "v2_lora"
78
+ },
79
+ "warning": "์–ด๋Œ‘ํ„ฐ ๋ฏธ๊ฐ์ง€ (vLLM ๋ฒ„์ „์— ๋”ฐ๋ผ ์ •์ƒ): civil, legal"
80
+ }
81
+ }
82
+ ]
83
+ }